Skip to content

Commit

Permalink
fix multi_gpu_float_sum calc bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Chinthaka Gamanayakege committed Jun 12, 2024
1 parent aedf89f commit e88f901
Showing 1 changed file with 1 addition and 0 deletions.
1 change: 1 addition & 0 deletions train_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,7 @@ float multi_gpu_float_sum(float value, float *unified_buffer, const MultiGpuConf
cudaCheck(cudaMemPrefetchAsync(unified_buffer, sizeof(float), multi_gpu_config->local_device_idx, 0));
ncclCheck(ncclAllReduce(unified_buffer, unified_buffer, sizeof(float), ncclFloat, ncclSum, multi_gpu_config->nccl_comm, 0));
cudaCheck(cudaMemPrefetchAsync(unified_buffer, sizeof(float), cudaCpuDeviceId, 0));
cudaCheck(cudaDeviceSynchronize());
return *unified_buffer;
}
// note MPI doesn't support all reduce with mean, only sum
Expand Down

0 comments on commit e88f901

Please sign in to comment.