From b7503c92386854f117411d3430e5c0c8cffd681b Mon Sep 17 00:00:00 2001
From: Rickard <rly@combination.se>
Date: Sun, 4 Feb 2024 11:29:52 +0100
Subject: [PATCH] Ran format on save from VScode

---
 .github/dependabot.yml               |  2 +-
 .github/workflows/python-package.yml |  4 +-
 csrc/mps_kernels.metal               | 60 ++++++++++++++--------------
 csrc/mps_ops.mm                      | 12 +++---
 4 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 616a1f98e..8a36c3689 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -8,4 +8,4 @@ updates:
       major:
         update-types: [major]
       minor-patch:
-        update-types: [minor, patch]
\ No newline at end of file
+        update-types: [minor, patch]
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 223f29ec2..4a34389af 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -8,7 +8,7 @@ on:
     types: [ published ]
 
 jobs:
-  
+
   ##
   # This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
   ##
@@ -120,7 +120,7 @@ jobs:
         build_os=${{ matrix.os }}
         build_arch=${{ matrix.arch }}
         for NO_CUBLASLT in ON OFF; do
-          if [ ${build_os:0:6} == ubuntu ]; then 
+          if [ ${build_os:0:6} == ubuntu ]; then
             image=nvidia/cuda:${{ matrix.cuda_version }}-devel-ubuntu22.04
             echo "Using image $image"
             docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \
diff --git a/csrc/mps_kernels.metal b/csrc/mps_kernels.metal
index a5c8e35b2..63b3bf78c 100644
--- a/csrc/mps_kernels.metal
+++ b/csrc/mps_kernels.metal
@@ -83,35 +83,35 @@ static unsigned char quantize_scalar(
     }
 }
 
-kernel void quantize(device float* code [[buffer(0)]],  
-                      device float* A [[buffer(1)]],  
-                      device uchar* out [[buffer(2)]],  
-                      constant uint& n [[buffer(3)]],  
-                      uint id [[thread_position_in_grid]]) {  
-  const uint n_full = (NUM_BLOCK * (n / NUM_BLOCK)) + (n % NUM_BLOCK == 0 ? 0 : NUM_BLOCK);  
-  uint valid_items = (id / NUM_BLOCK + 1 == (n + NUM_BLOCK - 1) / NUM_BLOCK) ? n - (id / NUM_BLOCK * NUM_BLOCK) : NUM_BLOCK;  
-  const uint base_idx = (id / NUM_BLOCK * NUM_BLOCK);  
-  
-  float vals[NUM];  
-  uchar qvals[NUM];  
-  
-  for (uint i = base_idx; i < n_full; i += ((n + NUM_BLOCK - 1) / NUM_BLOCK) * NUM_BLOCK) {  
-    valid_items = n - i > NUM_BLOCK ? NUM_BLOCK : n - i;  
-  
-    threadgroup_barrier(mem_flags::mem_threadgroup);  
-  
-    for (uint j = 0; j < valid_items; j++) {  
-      vals[j] = A[i + j];  
-    }  
-  
-    for (uint j = 0; j < valid_items; j++) {  
+kernel void quantize(device float* code [[buffer(0)]],
+                      device float* A [[buffer(1)]],
+                      device uchar* out [[buffer(2)]],
+                      constant uint& n [[buffer(3)]],
+                      uint id [[thread_position_in_grid]]) {
+  const uint n_full = (NUM_BLOCK * (n / NUM_BLOCK)) + (n % NUM_BLOCK == 0 ? 0 : NUM_BLOCK);
+  uint valid_items = (id / NUM_BLOCK + 1 == (n + NUM_BLOCK - 1) / NUM_BLOCK) ? n - (id / NUM_BLOCK * NUM_BLOCK) : NUM_BLOCK;
+  const uint base_idx = (id / NUM_BLOCK * NUM_BLOCK);
+
+  float vals[NUM];
+  uchar qvals[NUM];
+
+  for (uint i = base_idx; i < n_full; i += ((n + NUM_BLOCK - 1) / NUM_BLOCK) * NUM_BLOCK) {
+    valid_items = n - i > NUM_BLOCK ? NUM_BLOCK : n - i;
+
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+
+    for (uint j = 0; j < valid_items; j++) {
+      vals[j] = A[i + j];
+    }
+
+    for (uint j = 0; j < valid_items; j++) {
       qvals[j] = quantize_scalar<false>(0.0f, code, vals[j]);
-    }  
-  
-    threadgroup_barrier(mem_flags::mem_threadgroup);  
-  
-    for (uint j = 0; j < valid_items; j++) {  
-      out[i + j] = qvals[j];  
-    }  
-  }  
+    }
+
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+
+    for (uint j = 0; j < valid_items; j++) {
+      out[i + j] = qvals[j];
+    }
+  }
 }
diff --git a/csrc/mps_ops.mm b/csrc/mps_ops.mm
index 5e3adeebe..d198b3552 100644
--- a/csrc/mps_ops.mm
+++ b/csrc/mps_ops.mm
@@ -16,10 +16,10 @@
 
 static inline id<MTLDevice> get_device()
 {
-  NSError *error = nil;  
+  NSError *error = nil;
   static id<MTLDevice> device = nil;
   if(!device) {
-    device = MTLCreateSystemDefaultDevice();  
+    device = MTLCreateSystemDefaultDevice();
   }
   if(!device) {
     NSLog(@"Failed to get MPS device");
@@ -30,7 +30,7 @@
 
 static inline id<MTLLibrary> get_library()
 {
-  NSError *error = nil;  
+  NSError *error = nil;
   static id<MTLLibrary> library = nil;
   if(!library) {
     library = [get_device() newLibraryWithURL:[NSURL fileURLWithPath:@"bitsandbytes.metallib"] error:&error];
@@ -40,7 +40,7 @@
     abort();
   }
   return library;
-}  
+}
 
 /*MPSGraphTensor* dequantize_mps(MPSGraphTensor* code, MPSGraphTensor* A, int n)
 {
@@ -49,7 +49,7 @@
 }*/
 
 
-// MPSGraph function for quantize  
+// MPSGraph function for quantize
 extern "C" MPSGraphTensor* quantize_mps(MPSGraph* graph, MPSGraphTensor* code, MPSGraphTensor* A, int n)
 {
   id<MTLDevice> device = get_device();
@@ -64,4 +64,4 @@
   }
   NSLog(@"Not implemented");
   return nil;
-}
\ No newline at end of file
+}