Skip to content

Commit

Permalink
We still support CUDA down to 10 so maybe wrap this code if we have t…
Browse files Browse the repository at this point in the history
…he right compute capability to use it.
  • Loading branch information
jaycedowell committed Jul 17, 2024
1 parent 8243297 commit 0b219b6
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions src/transpose.cu
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,17 @@ BFstatus transpose(int ndim,
sizes[0]*out_strides[0] <
(long)std::numeric_limits<int>::max());
#if BF_CUDA_ENABLED
#if BF_GPU_MIN_ARCH < 40
if( ELEMENT_SIZE == 6 ||
ELEMENT_SIZE == 8 ||
ELEMENT_SIZE == 16 ) {
// TODO: Doing this here might be a bad idea
cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
}
else {
cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeFourByte);
}
#endif
if( can_use_int ) {
kernel::transpose
<TILE_DIM,BLOCK_ROWS,CONDITIONAL_WRITE,
Expand Down

0 comments on commit 0b219b6

Please sign in to comment.