From b7503c92386854f117411d3430e5c0c8cffd681b Mon Sep 17 00:00:00 2001 From: Rickard Date: Sun, 4 Feb 2024 11:29:52 +0100 Subject: [PATCH] Ran format on save from VScode --- .github/dependabot.yml | 2 +- .github/workflows/python-package.yml | 4 +- csrc/mps_kernels.metal | 60 ++++++++++++++-------------- csrc/mps_ops.mm | 12 +++--- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 616a1f98e..8a36c3689 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,4 +8,4 @@ updates: major: update-types: [major] minor-patch: - update-types: [minor, patch] \ No newline at end of file + update-types: [minor, patch] diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 223f29ec2..4a34389af 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -8,7 +8,7 @@ on: types: [ published ] jobs: - + ## # This job matrix builds the non-CUDA versions of the libraries for all supported platforms. ## @@ -120,7 +120,7 @@ jobs: build_os=${{ matrix.os }} build_arch=${{ matrix.arch }} for NO_CUBLASLT in ON OFF; do - if [ ${build_os:0:6} == ubuntu ]; then + if [ ${build_os:0:6} == ubuntu ]; then image=nvidia/cuda:${{ matrix.cuda_version }}-devel-ubuntu22.04 echo "Using image $image" docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \ diff --git a/csrc/mps_kernels.metal b/csrc/mps_kernels.metal index a5c8e35b2..63b3bf78c 100644 --- a/csrc/mps_kernels.metal +++ b/csrc/mps_kernels.metal @@ -83,35 +83,35 @@ static unsigned char quantize_scalar( } } -kernel void quantize(device float* code [[buffer(0)]], - device float* A [[buffer(1)]], - device uchar* out [[buffer(2)]], - constant uint& n [[buffer(3)]], - uint id [[thread_position_in_grid]]) { - const uint n_full = (NUM_BLOCK * (n / NUM_BLOCK)) + (n % NUM_BLOCK == 0 ? 0 : NUM_BLOCK); - uint valid_items = (id / NUM_BLOCK + 1 == (n + NUM_BLOCK - 1) / NUM_BLOCK) ? n - (id / NUM_BLOCK * NUM_BLOCK) : NUM_BLOCK; - const uint base_idx = (id / NUM_BLOCK * NUM_BLOCK); - - float vals[NUM]; - uchar qvals[NUM]; - - for (uint i = base_idx; i < n_full; i += ((n + NUM_BLOCK - 1) / NUM_BLOCK) * NUM_BLOCK) { - valid_items = n - i > NUM_BLOCK ? NUM_BLOCK : n - i; - - threadgroup_barrier(mem_flags::mem_threadgroup); - - for (uint j = 0; j < valid_items; j++) { - vals[j] = A[i + j]; - } - - for (uint j = 0; j < valid_items; j++) { +kernel void quantize(device float* code [[buffer(0)]], + device float* A [[buffer(1)]], + device uchar* out [[buffer(2)]], + constant uint& n [[buffer(3)]], + uint id [[thread_position_in_grid]]) { + const uint n_full = (NUM_BLOCK * (n / NUM_BLOCK)) + (n % NUM_BLOCK == 0 ? 0 : NUM_BLOCK); + uint valid_items = (id / NUM_BLOCK + 1 == (n + NUM_BLOCK - 1) / NUM_BLOCK) ? n - (id / NUM_BLOCK * NUM_BLOCK) : NUM_BLOCK; + const uint base_idx = (id / NUM_BLOCK * NUM_BLOCK); + + float vals[NUM]; + uchar qvals[NUM]; + + for (uint i = base_idx; i < n_full; i += ((n + NUM_BLOCK - 1) / NUM_BLOCK) * NUM_BLOCK) { + valid_items = n - i > NUM_BLOCK ? NUM_BLOCK : n - i; + + threadgroup_barrier(mem_flags::mem_threadgroup); + + for (uint j = 0; j < valid_items; j++) { + vals[j] = A[i + j]; + } + + for (uint j = 0; j < valid_items; j++) { qvals[j] = quantize_scalar(0.0f, code, vals[j]); - } - - threadgroup_barrier(mem_flags::mem_threadgroup); - - for (uint j = 0; j < valid_items; j++) { - out[i + j] = qvals[j]; - } - } + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + for (uint j = 0; j < valid_items; j++) { + out[i + j] = qvals[j]; + } + } } diff --git a/csrc/mps_ops.mm b/csrc/mps_ops.mm index 5e3adeebe..d198b3552 100644 --- a/csrc/mps_ops.mm +++ b/csrc/mps_ops.mm @@ -16,10 +16,10 @@ static inline id get_device() { - NSError *error = nil; + NSError *error = nil; static id device = nil; if(!device) { - device = MTLCreateSystemDefaultDevice(); + device = MTLCreateSystemDefaultDevice(); } if(!device) { NSLog(@"Failed to get MPS device"); @@ -30,7 +30,7 @@ static inline id get_library() { - NSError *error = nil; + NSError *error = nil; static id library = nil; if(!library) { library = [get_device() newLibraryWithURL:[NSURL fileURLWithPath:@"bitsandbytes.metallib"] error:&error]; @@ -40,7 +40,7 @@ abort(); } return library; -} +} /*MPSGraphTensor* dequantize_mps(MPSGraphTensor* code, MPSGraphTensor* A, int n) { @@ -49,7 +49,7 @@ }*/ -// MPSGraph function for quantize +// MPSGraph function for quantize extern "C" MPSGraphTensor* quantize_mps(MPSGraph* graph, MPSGraphTensor* code, MPSGraphTensor* A, int n) { id device = get_device(); @@ -64,4 +64,4 @@ } NSLog(@"Not implemented"); return nil; -} \ No newline at end of file +}