Skip to content

Commit

Permalink
kernel working and tested
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Nov 20, 2024
1 parent 9cf977d commit d782d29
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
3 changes: 2 additions & 1 deletion src/ggml-metal/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -3485,7 +3485,8 @@ static void ggml_metal_encode_node(
[encoder setComputePipelineState:pipeline];
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
[encoder setBytes:&nb01 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];

[encoder dispatchThreadgroups:MTLSizeMake(nrows, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break;
Expand Down
9 changes: 5 additions & 4 deletions src/ggml-metal/ggml-metal.metal
Original file line number Diff line number Diff line change
Expand Up @@ -1347,15 +1347,16 @@ kernel void kernel_ssm_scan_f32(
kernel void kernel_argmax(
device const void * x,
device int32_t * dst,
constant int64_t & nb01,
constant int64_t & ncols,
constant uint64_t & nb01,
uint tgpig[[threadgroup_position_in_grid]]) {
device const float * x_row = (device const float *) ((device const char *) x + tgpig * nb01);

dst[tpitg] = 0;
dst[tgpig] = 0;

for (int i = 0; i < ncols; i++) {
if (x_row[i] > x_row[dst[tpitg]]) {
dst[tpitg] = i;
if (x_row[i] > x_row[dst[tgpig]]) {
dst[tgpig] = i;
}
}
}
Expand Down

0 comments on commit d782d29

Please sign in to comment.