Skip to content

Commit

Permalink
[Fix] compile errors on new image
Browse files Browse the repository at this point in the history
  • Loading branch information
jiashuy committed Feb 8, 2025
1 parent c866f49 commit c0799b0
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 29 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docs-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
run: |
make -C docs html
- name: Upload HTML
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: html-build-artifact
path: docs/build/html
Expand All @@ -38,7 +38,7 @@ jobs:
echo ${{ github.event.pull_request.merged }} > ./pr/merged.txt
echo ${{ github.event.action }} > ./pr/action.txt
- name: Upload PR information
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: pr
path: pr/
2 changes: 1 addition & 1 deletion .github/workflows/docs-sched-rebuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
find docs/build -name .doctrees -prune -exec rm -rf {} \;
find docs/build -name .buildinfo -exec rm {} \;
- name: Upload HTML
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: html-build-artifact
path: docs/build/html
Expand Down
65 changes: 39 additions & 26 deletions include/merlin/core_kernels/group_lock_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,29 @@
*/

#pragma once
#include <cuda/atomic>
#include <cuda/std/semaphore>

namespace nv {
namespace merlin {
namespace group_lock {

static __global__ void init_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count,
template<typename T>
__global__ void init_kernel(
cuda::atomic<T, cuda::thread_scope_device>* update_count,
cuda::atomic<T, cuda::thread_scope_device>* read_count,
cuda::atomic<bool, cuda::thread_scope_device>* unique_flag) {
new (update_count) cuda::atomic<int, cuda::thread_scope_device>{0};
new (read_count) cuda::atomic<int, cuda::thread_scope_device>{0};
new (unique_flag) cuda::atomic<bool, cuda::thread_scope_device>{false};
if (blockIdx.x == 0 && threadIdx.x == 0) {
new (update_count) cuda::atomic<T, cuda::thread_scope_device>{0};
new (read_count) cuda::atomic<T, cuda::thread_scope_device>{0};
new (unique_flag) cuda::atomic<bool, cuda::thread_scope_device>{false};
}
}
static __global__ void lock_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count) {

template<typename T>
__global__ void lock_read_kernel(
cuda::atomic<T, cuda::thread_scope_device>* update_count,
cuda::atomic<T, cuda::thread_scope_device>* read_count) {
for (;;) {
while (update_count->load(cuda::std::memory_order_relaxed)) {
}
Expand All @@ -43,14 +49,16 @@ static __global__ void lock_read_kernel(
}
}

static __global__ void unlock_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* read_count) {
template<typename T>
__global__ void unlock_read_kernel(
cuda::atomic<T, cuda::thread_scope_device>* read_count) {
read_count->fetch_sub(1, cuda::std::memory_order_relaxed);
}

static __global__ void lock_update_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count) {
template<typename T>
__global__ void lock_update_kernel(
cuda::atomic<T, cuda::thread_scope_device>* update_count,
cuda::atomic<T, cuda::thread_scope_device>* read_count) {
for (;;) {
while (read_count->load(cuda::std::memory_order_relaxed)) {
}
Expand All @@ -62,14 +70,16 @@ static __global__ void lock_update_kernel(
}
}

static __global__ void unlock_update_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count) {
template<typename T>
__global__ void unlock_update_kernel(
cuda::atomic<T, cuda::thread_scope_device>* update_count) {
update_count->fetch_sub(1, cuda::std::memory_order_relaxed);
}

static __global__ void lock_update_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count,
template<typename T>
__global__ void lock_update_read_kernel(
cuda::atomic<T, cuda::thread_scope_device>* update_count,
cuda::atomic<T, cuda::thread_scope_device>* read_count,
cuda::atomic<bool, cuda::thread_scope_device>* unique_flag) {
/* Lock unique flag */
bool expected = false;
Expand Down Expand Up @@ -101,22 +111,25 @@ static __global__ void lock_update_read_kernel(
}
}

static __global__ void unlock_update_read_kernel(
cuda::atomic<int, cuda::thread_scope_device>* update_count,
cuda::atomic<int, cuda::thread_scope_device>* read_count,
template<typename T>
__global__ void unlock_update_read_kernel(
cuda::atomic<T, cuda::thread_scope_device>* update_count,
cuda::atomic<T, cuda::thread_scope_device>* read_count,
cuda::atomic<bool, cuda::thread_scope_device>* unique_flag) {
read_count->fetch_sub(1, cuda::std::memory_order_relaxed);
update_count->fetch_sub(1, cuda::std::memory_order_relaxed);
unique_flag->store(false, cuda::std::memory_order_relaxed);
}

static __global__ void update_count_kernel(
int* counter, cuda::atomic<int, cuda::thread_scope_device>* update_count) {
template<typename T>
__global__ void update_count_kernel(
T* counter, cuda::atomic<T, cuda::thread_scope_device>* update_count) {
*counter = update_count->load(cuda::std::memory_order_relaxed);
}

static __global__ void read_count_kernel(
int* counter, cuda::atomic<int, cuda::thread_scope_device>* read_count) {
template<typename T>
__global__ void read_count_kernel(
T* counter, cuda::atomic<T, cuda::thread_scope_device>* read_count) {
*counter = read_count->load(cuda::std::memory_order_relaxed);
}

Expand Down
1 change: 1 addition & 0 deletions include/merlin/types.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <stddef.h>
#include <cstdint>
#include <cuda/atomic>
#include <cuda/std/semaphore>
#include "debug.hpp"

Expand Down

0 comments on commit c0799b0

Please sign in to comment.