updates for nm-magic-wand, nightly or release

neuralmagic · May 16, 2024 · 1c97968 · 1c97968 · github-actions · May 16, 2024
1 parent 2fcfced
commit 1c97968
Show file tree

Hide file tree

Showing 4 changed files with 4 additions and 56 deletions.
diff --git a/.github/actions/nm-build-vllm/action.yml b/.github/actions/nm-build-vllm/action.yml
@@ -32,7 +32,6 @@ runs:
       VENV="${{ inputs.venv }}-${COMMIT:0:7}"
       source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
       # TODO: adjust when we need a proper release. use nightly now.
-      pip3 install nm-magic-wand-nightly
       pip3 install -r requirements-cuda.txt -r requirements-build.txt
       # build
       SUCCESS=0

diff --git a/.github/actions/nm-install-test-whl/action.yml b/.github/actions/nm-install-test-whl/action.yml
@@ -52,6 +52,9 @@ runs:
         pip3 install ${WHL}[sparse]
         # report magic_wand version
         MAGIC_WAND=$(pip3 show nm-magic-wand-nightly | grep "Version" | cut -d' ' -f2)
+        if [ -z "${MAGIC_WAND}" ]; then
+          MAGIC_WAND=$(pip3 show nm-magic-wand | grep "Version" | cut -d' ' -f2)
+        fi
         echo "magic_wand=${MAGIC_WAND}" >> "$GITHUB_OUTPUT"
         # test and collect code coverage
         SUCCESS=0

diff --git a/.github/actions/nm-test-vllm/action.yml b/.github/actions/nm-test-vllm/action.yml
diff --git a/.github/scripts/build b/.github/scripts/build
@@ -6,7 +6,6 @@ usage() {
     echo
     echo "usage: ${0} <options>"
     echo
-    echo "  -a    - pypi server address"
     echo "  -p    - python version"
     echo "  -v    - name for virtualenv"
     echo "  -h    - this list of options"
@@ -17,15 +16,12 @@ PYPI_IP=
 PYTHON=
 VENV=
 
-while getopts "ha:p:v:" OPT; do
+while getopts "hp:v:" OPT; do
     case "${OPT}" in
         h)
             usage
             exit 1
             ;;
-        a)
-            PYPI_IP="${OPTARG}"
-            ;;
         p)
             PYTHON="${OPTARG}"
             ;;
@@ -36,12 +32,6 @@ while getopts "ha:p:v:" OPT; do
 done
 
 # check if variables are valid
-if [ -z "${PYPI_IP}" ]; then
-    echo "please provide 'pypi' server address"
-    usage
-    exit 1
-fi
-
 if [ -z "${PYTHON}" ]; then
     echo "please provide python version, e.g. 3.10.12"
     usage
@@ -55,6 +45,5 @@ if [ -z "${VENV}" ]; then
 fi
 
 source $(pyenv root)/versions/${PYTHON}/envs/${VENV}/bin/activate
-pip3 install --index-url http://${PYPI_IP}:8080/ --trusted-host ${PYPI_IP} nm-magic-wand-nightly
 pip3 install -r requirements-cuda.txt -r requirements-build.txt
 pip3 install -e .
Benchmark suite	Current: `1c97968`	Previous: `6334dd3`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.3.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`3.8374945566455483` prompts/s	`3.8418198063652103` prompts/s	`1.00`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.3.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`1473.5979097518905` tokens/s	`1475.2588056442407` tokens/s	`1.00`