Skip to content

Commit

Permalink
fix: convert cuda popcll ret type to i32
Browse files Browse the repository at this point in the history
  • Loading branch information
lgyStoic committed Apr 15, 2023
1 parent 6b348fd commit c2f7207
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions taichi/codegen/cuda/codegen_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,15 +268,13 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM {
builder->CreateStore(frac, frac_ptr);
llvm_val[stmt] = res;
} else if (op == UnaryOpType::popcnt) {
if (input_taichi_type->is_primitive(PrimitiveTypeID::u64)) {
auto i64_input = builder->CreateBitCast(
input, llvm::Type::getInt64Ty(*tlctx->get_this_thread_context()));
llvm_val[stmt] = call("__nv_popcll", i64_input);
if (input_taichi_type->is_primitive(PrimitiveTypeID::u64) ||
input_taichi_type->is_primitive(PrimitiveTypeID::i64)) {
stmt->ret_type = PrimitiveType::i32;
llvm_val[stmt] = call("__nv_popcll", input);
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::i32) ||
input_taichi_type->is_primitive(PrimitiveTypeID::u32)) {
llvm_val[stmt] = call("__nv_popc", input);
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::i64)) {
llvm_val[stmt] = call("__nv_popcll", input);
} else {
TI_NOT_IMPLEMENTED
}
Expand Down

0 comments on commit c2f7207

Please sign in to comment.