Wenxh/fp8 on a100 v5 (#1)

Group Gemm Version
ykim362 · Jun 14, 2024 · aca4a33 · aca4a33
1 parent 03e3bda
commit aca4a33
Show file tree

Hide file tree

Showing 11 changed files with 997 additions and 446 deletions.
diff --git a/requirements-cuda.txt b/requirements-cuda.txt
@@ -8,4 +8,10 @@ vllm-nccl-cu12>=2.18,<2.19  # for downloading nccl library
 torch == 2.2.1
 xformers == 0.0.25  # Requires PyTorch 2.2.1
 
-cupy-cuda12x
+# Dependencies for pycublas-moe-groupe-gemm
+gitpython
+pytest
+loguru
+# In case of invalide url, please install from this file:
+#   pip install gitpython pytest loguru vllm/model_executor/layers/fused_moe/pycublas.zip
+git+https://github.com/wenxcs/pycublas.git@moe-group-gemm