From 4bf19d2a671afd65b37dcf0ceaf41db7d7298159 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 03:24:33 +0000 Subject: [PATCH 01/12] [Misc] Define common requirements --- requirements.txt => requirements-common.txt | 7 +------ requirements-cpu.txt | 15 +-------------- requirements-cuda.txt | 5 +++++ requirements-neuron.txt | 9 --------- requirements-rocm.txt | 17 ----------------- setup.py | 5 ++++- 6 files changed, 11 insertions(+), 47 deletions(-) rename requirements.txt => requirements-common.txt (65%) create mode 100644 requirements-cuda.txt diff --git a/requirements.txt b/requirements-common.txt similarity index 65% rename from requirements.txt rename to requirements-common.txt index df0f6dd1ee3ca..3cfc60b9421b6 100644 --- a/requirements.txt +++ b/requirements-common.txt @@ -1,19 +1,14 @@ cmake>=3.21 ninja # For faster builds. psutil -ray >= 2.9 sentencepiece # Required for LLaMA tokenizer. numpy -torch == 2.1.2 requests py-cpuinfo transformers >= 4.39.1 # Required for StarCoder2 & Llava. -xformers == 0.0.23.post1 # Required for CUDA 12.1. fastapi uvicorn[standard] pydantic >= 2.0 # Required for OpenAI server. prometheus_client >= 0.18.0 -pynvml == 11.5.0 -triton >= 2.1.0 outlines == 0.0.34 -tiktoken == 0.6.0 # Required for DBRX tokenizer +tiktoken == 0.6.0 # Required for DBRX tokenizer diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 580bffea5a018..99a9449dd0cb1 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -1,15 +1,2 @@ -cmake>=3.21 -ninja # For faster builds. -psutil -ray >= 2.9 -sentencepiece # Required for LLaMA tokenizer. -numpy -transformers >= 4.38.0 # Required for Gemma. -fastapi -uvicorn[standard] -pydantic >= 2.0 # Required for OpenAI server. -prometheus_client >= 0.18.0 torch == 2.1.2+cpu -triton >= 2.1.0 -filelock == 3.13.3 -py-cpuinfo \ No newline at end of file +triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error. diff --git a/requirements-cuda.txt b/requirements-cuda.txt new file mode 100644 index 0000000000000..b38c1425d8e4a --- /dev/null +++ b/requirements-cuda.txt @@ -0,0 +1,5 @@ +ray >= 2.9 +torch == 2.1.2 +xformers == 0.0.23.post1 # Required for CUDA 12.1. +pynvml == 11.5.0 +triton >= 2.1.0 diff --git a/requirements-neuron.txt b/requirements-neuron.txt index 6828bd4fd1fce..0dba43e822629 100644 --- a/requirements-neuron.txt +++ b/requirements-neuron.txt @@ -1,12 +1,3 @@ -sentencepiece # Required for LLaMA tokenizer. -numpy transformers-neuronx >= 0.9.0 torch-neuronx >= 2.1.0 neuronx-cc -fastapi -uvicorn[standard] -pydantic >= 2.0 # Required for OpenAI server. -prometheus_client >= 0.18.0 -requests -psutil -py-cpuinfo \ No newline at end of file diff --git a/requirements-rocm.txt b/requirements-rocm.txt index 4e9f598551fee..7ba914810cce9 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -1,18 +1 @@ -cmake>=3.21 -ninja # For faster builds. -typing-extensions>=4.8.0 -starlette -requests -py-cpuinfo -psutil ray == 2.9.3 -sentencepiece # Required for LLaMA tokenizer. -numpy -tokenizers>=0.15.0 -transformers >= 4.39.1 # Required for StarCoder2 & Llava. -fastapi -uvicorn[standard] -pydantic >= 2.0 # Required for OpenAI server. -prometheus_client >= 0.18.0 -outlines == 0.0.34 -tiktoken == 0.6.0 # Required for DBRX tokenizer diff --git a/setup.py b/setup.py index e80226faa4807..c73a40252379a 100644 --- a/setup.py +++ b/setup.py @@ -325,6 +325,9 @@ def read_readme() -> str: def get_requirements() -> List[str]: """Get Python package dependencies from requirements.txt.""" + with open(get_path("requirements-common.txt")) as f: + common = f.read().strip().split("\n") + if _is_cuda(): with open(get_path("requirements.txt")) as f: requirements = f.read().strip().split("\n") @@ -341,7 +344,7 @@ def get_requirements() -> List[str]: raise ValueError( "Unsupported platform, please use CUDA, ROCM or Neuron.") - return requirements + return common + requirements ext_modules = [] From 181e66e8945e9f7fa4848b46325fd5b519726eeb Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 04:14:36 +0000 Subject: [PATCH 02/12] Fix docker files & docs --- .github/workflows/publish.yml | 2 +- .github/workflows/scripts/build.sh | 3 ++- CONTRIBUTING.md | 1 - Dockerfile | 17 ++++++++++++----- Dockerfile.cpu | 1 + Dockerfile.rocm | 1 + MANIFEST.in | 3 ++- .../source/getting_started/amd-installation.rst | 2 ++ .../source/getting_started/cpu-installation.rst | 1 + .../getting_started/neuron-installation.rst | 1 + setup.py | 2 +- 11 files changed, 24 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5211dc180798e..47ee026416d31 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -49,7 +49,7 @@ jobs: matrix: os: ['ubuntu-20.04'] python-version: ['3.8', '3.9', '3.10', '3.11'] - pytorch-version: ['2.1.2'] # Must be the most recent version that meets requirements.txt. + pytorch-version: ['2.1.2'] # Must be the most recent version that meets requirements-cuda.txt. cuda-version: ['11.8', '12.1'] steps: diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index ed200fe724d3e..226afaa11e1f0 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -9,7 +9,8 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH # Install requirements $python_executable -m pip install wheel packaging -$python_executable -m pip install -r requirements.txt +$python_executable -m pip install -r requirements-common.txt +$python_executable -m pip install -r requirements-cuda.txt # Limit the number of parallel jobs to avoid OOM export MAX_JOBS=1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index befd61ff516e0..81a8db2b268b0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,7 +21,6 @@ Express your support on Twitter if vLLM aids you, or simply offer your appreciat ### Build from source ```bash -pip install -r requirements.txt pip install -e . # This may take several minutes. ``` diff --git a/Dockerfile b/Dockerfile index f975530e09407..c7a92e6f85c1a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,9 +16,12 @@ RUN ldconfig /usr/local/cuda-12.1/compat/ WORKDIR /workspace # install build and runtime dependencies -COPY requirements.txt requirements.txt +COPY requirements-common.txt requirements-common.txt RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements.txt + pip install -r requirements-common.txt +COPY requirements-cuda.txt requirements-cuda.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements-cuda.txt # install development dependencies COPY requirements-dev.txt requirements-dev.txt @@ -43,7 +46,8 @@ COPY csrc csrc COPY setup.py setup.py COPY cmake cmake COPY CMakeLists.txt CMakeLists.txt -COPY requirements.txt requirements.txt +COPY requirements-common.txt requirements-common.txt +COPY requirements-cuda.txt requirements-cuda.txt COPY pyproject.toml pyproject.toml COPY vllm/__init__.py vllm/__init__.py @@ -111,9 +115,12 @@ RUN apt-get update -y \ && apt-get install -y python3-pip WORKDIR /workspace -COPY requirements.txt requirements.txt +COPY requirements-common.txt requirements-common.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements-common.txt +COPY requirements-cuda.txt requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements.txt + pip install -r requirements-cuda.txt # Install flash attention (from pre-built wheel) RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \ diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 4251fddd6cc3b..db3dc05ebac8e 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -13,6 +13,7 @@ COPY ./ /workspace/vllm WORKDIR /workspace/vllm +RUN pip install -v -r requirements-common.txt --extra-index-url https://download.pytorch.org/whl/cpu RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 65a367994f960..1b2b774fbabbf 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -82,6 +82,7 @@ RUN python3 -m pip install xformers==0.0.23 --no-deps RUN cd /app \ && cd vllm \ + && pip install -U -r requirements-common.txt \ && pip install -U -r requirements-rocm.txt \ && if [ "$BUILD_FA" = "1" ]; then \ bash patch_xformers.rocm.sh; fi \ diff --git a/MANIFEST.in b/MANIFEST.in index aa16da6500e6c..d385f194c6c0f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include LICENSE -include requirements.txt +include requirements-common.txt +include requirements-cuda.txt include CMakeLists.txt recursive-include cmake * diff --git a/docs/source/getting_started/amd-installation.rst b/docs/source/getting_started/amd-installation.rst index 3d736bf7120ec..1779795b03eb2 100644 --- a/docs/source/getting_started/amd-installation.rst +++ b/docs/source/getting_started/amd-installation.rst @@ -87,6 +87,7 @@ At the time of this documentation update, PyTorch on ROCm 6.0 wheel is not yet a .. code-block:: console $ cd vllm + $ pip install -U -r requirements-common.txt $ pip install -U -r requirements-rocm.txt $ python setup.py install # This may take 5-10 minutes. Currently, `pip install .`` does not work for ROCm installation @@ -163,6 +164,7 @@ Alternatively, if you plan to install vLLM-ROCm on a local machine or start from .. code-block:: console $ cd vllm + $ pip install -U -r requirements-common.txt $ pip install -U -r requirements-rocm.txt $ python setup.py install # This may take 5-10 minutes. diff --git a/docs/source/getting_started/cpu-installation.rst b/docs/source/getting_started/cpu-installation.rst index ba8b0645adcdf..60be90b140002 100644 --- a/docs/source/getting_started/cpu-installation.rst +++ b/docs/source/getting_started/cpu-installation.rst @@ -55,6 +55,7 @@ Build from source $ pip install --upgrade pip $ pip install wheel packaging ninja setuptools>=49.4.0 numpy + $ pip install -v -r requirements-common.txt $ pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu - Finally, build and install vLLM CPU backend: diff --git a/docs/source/getting_started/neuron-installation.rst b/docs/source/getting_started/neuron-installation.rst index 62bf779c339d5..a9c24852bb92b 100644 --- a/docs/source/getting_started/neuron-installation.rst +++ b/docs/source/getting_started/neuron-installation.rst @@ -130,6 +130,7 @@ Once neuronx-cc and transformers-neuronx packages are installed, we will be able $ git clone https://github.com/vllm-project/vllm.git $ cd vllm + $ pip install -U -r requirements-common.txt $ pip install -U -r requirements-neuron.txt $ pip install . diff --git a/setup.py b/setup.py index c73a40252379a..f72b45609328a 100644 --- a/setup.py +++ b/setup.py @@ -329,7 +329,7 @@ def get_requirements() -> List[str]: common = f.read().strip().split("\n") if _is_cuda(): - with open(get_path("requirements.txt")) as f: + with open(get_path("requirements-cuda.txt")) as f: requirements = f.read().strip().split("\n") elif _is_hip(): with open(get_path("requirements-rocm.txt")) as f: From e83e3b475c7724d8723424473f95c02e6c5c1511 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 04:44:50 +0000 Subject: [PATCH 03/12] Minor fix --- Dockerfile.cpu | 2 +- requirements-common.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.cpu b/Dockerfile.cpu index db3dc05ebac8e..7740fd271bbd7 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -13,7 +13,7 @@ COPY ./ /workspace/vllm WORKDIR /workspace/vllm -RUN pip install -v -r requirements-common.txt --extra-index-url https://download.pytorch.org/whl/cpu +RUN pip install -v -r requirements-common.txt RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install diff --git a/requirements-common.txt b/requirements-common.txt index 3cfc60b9421b6..cb00609bd3593 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -1,4 +1,4 @@ -cmake>=3.21 +cmake >= 3.21 ninja # For faster builds. psutil sentencepiece # Required for LLaMA tokenizer. From a810524fda39ab05b6fbbfb219683cc05514c613 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 06:52:47 +0000 Subject: [PATCH 04/12] Move outlines to requirements-cuda --- requirements-common.txt | 1 - requirements-cuda.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-common.txt b/requirements-common.txt index cb00609bd3593..dc331c11de14f 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -10,5 +10,4 @@ fastapi uvicorn[standard] pydantic >= 2.0 # Required for OpenAI server. prometheus_client >= 0.18.0 -outlines == 0.0.34 tiktoken == 0.6.0 # Required for DBRX tokenizer diff --git a/requirements-cuda.txt b/requirements-cuda.txt index b38c1425d8e4a..d01402106b210 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -3,3 +3,4 @@ torch == 2.1.2 xformers == 0.0.23.post1 # Required for CUDA 12.1. pynvml == 11.5.0 triton >= 2.1.0 +outlines == 0.0.34 # Requires torch >= 2.1.0 From ce74ff10ed627a778dbbf20de4a3f47adce11179 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 15:58:29 +0000 Subject: [PATCH 05/12] Add -r requirements-common.txt to all requirements --- .github/workflows/scripts/build.sh | 1 - Dockerfile | 4 --- Dockerfile.cpu | 1 - Dockerfile.rocm | 1 - .../getting_started/amd-installation.rst | 2 -- .../getting_started/cpu-installation.rst | 1 - .../getting_started/neuron-installation.rst | 1 - requirements-cpu.txt | 4 +++ requirements-cuda.txt | 4 +++ requirements-neuron.txt | 4 +++ requirements-rocm.txt | 4 +++ setup.py | 28 ++++++++++--------- 12 files changed, 31 insertions(+), 24 deletions(-) diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index 226afaa11e1f0..60a3978f9abd7 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -9,7 +9,6 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH # Install requirements $python_executable -m pip install wheel packaging -$python_executable -m pip install -r requirements-common.txt $python_executable -m pip install -r requirements-cuda.txt # Limit the number of parallel jobs to avoid OOM diff --git a/Dockerfile b/Dockerfile index c7a92e6f85c1a..12c490bd61bf4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,6 @@ WORKDIR /workspace # install build and runtime dependencies COPY requirements-common.txt requirements-common.txt -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements-common.txt COPY requirements-cuda.txt requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/pip \ pip install -r requirements-cuda.txt @@ -116,8 +114,6 @@ RUN apt-get update -y \ WORKDIR /workspace COPY requirements-common.txt requirements-common.txt -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements-common.txt COPY requirements-cuda.txt requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/pip \ pip install -r requirements-cuda.txt diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 7740fd271bbd7..4251fddd6cc3b 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -13,7 +13,6 @@ COPY ./ /workspace/vllm WORKDIR /workspace/vllm -RUN pip install -v -r requirements-common.txt RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 1b2b774fbabbf..65a367994f960 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -82,7 +82,6 @@ RUN python3 -m pip install xformers==0.0.23 --no-deps RUN cd /app \ && cd vllm \ - && pip install -U -r requirements-common.txt \ && pip install -U -r requirements-rocm.txt \ && if [ "$BUILD_FA" = "1" ]; then \ bash patch_xformers.rocm.sh; fi \ diff --git a/docs/source/getting_started/amd-installation.rst b/docs/source/getting_started/amd-installation.rst index 1779795b03eb2..3d736bf7120ec 100644 --- a/docs/source/getting_started/amd-installation.rst +++ b/docs/source/getting_started/amd-installation.rst @@ -87,7 +87,6 @@ At the time of this documentation update, PyTorch on ROCm 6.0 wheel is not yet a .. code-block:: console $ cd vllm - $ pip install -U -r requirements-common.txt $ pip install -U -r requirements-rocm.txt $ python setup.py install # This may take 5-10 minutes. Currently, `pip install .`` does not work for ROCm installation @@ -164,7 +163,6 @@ Alternatively, if you plan to install vLLM-ROCm on a local machine or start from .. code-block:: console $ cd vllm - $ pip install -U -r requirements-common.txt $ pip install -U -r requirements-rocm.txt $ python setup.py install # This may take 5-10 minutes. diff --git a/docs/source/getting_started/cpu-installation.rst b/docs/source/getting_started/cpu-installation.rst index 60be90b140002..ba8b0645adcdf 100644 --- a/docs/source/getting_started/cpu-installation.rst +++ b/docs/source/getting_started/cpu-installation.rst @@ -55,7 +55,6 @@ Build from source $ pip install --upgrade pip $ pip install wheel packaging ninja setuptools>=49.4.0 numpy - $ pip install -v -r requirements-common.txt $ pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu - Finally, build and install vLLM CPU backend: diff --git a/docs/source/getting_started/neuron-installation.rst b/docs/source/getting_started/neuron-installation.rst index a9c24852bb92b..62bf779c339d5 100644 --- a/docs/source/getting_started/neuron-installation.rst +++ b/docs/source/getting_started/neuron-installation.rst @@ -130,7 +130,6 @@ Once neuronx-cc and transformers-neuronx packages are installed, we will be able $ git clone https://github.com/vllm-project/vllm.git $ cd vllm - $ pip install -U -r requirements-common.txt $ pip install -U -r requirements-neuron.txt $ pip install . diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 99a9449dd0cb1..5ca1264bd3944 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -1,2 +1,6 @@ +# Common dependencies +-r requirements-common.txt + +# Dependencies for x86_64 CPUs torch == 2.1.2+cpu triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error. diff --git a/requirements-cuda.txt b/requirements-cuda.txt index d01402106b210..d9338f8cf9eac 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -1,3 +1,7 @@ +# Common dependencies +-r requirements-common.txt + +# Dependencies for NVIDIA GPUs ray >= 2.9 torch == 2.1.2 xformers == 0.0.23.post1 # Required for CUDA 12.1. diff --git a/requirements-neuron.txt b/requirements-neuron.txt index 0dba43e822629..92b705b4b2d67 100644 --- a/requirements-neuron.txt +++ b/requirements-neuron.txt @@ -1,3 +1,7 @@ +# Common dependencies +-r requirements-common.txt + +# Dependencies for Neuron devices transformers-neuronx >= 0.9.0 torch-neuronx >= 2.1.0 neuronx-cc diff --git a/requirements-rocm.txt b/requirements-rocm.txt index 7ba914810cce9..903845b64d98f 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -1 +1,5 @@ +# Common dependencies +-r requirements-common.txt + +# Dependencies for AMD GPUs ray == 2.9.3 diff --git a/setup.py b/setup.py index f72b45609328a..95ed4f8813623 100644 --- a/setup.py +++ b/setup.py @@ -325,26 +325,28 @@ def read_readme() -> str: def get_requirements() -> List[str]: """Get Python package dependencies from requirements.txt.""" - with open(get_path("requirements-common.txt")) as f: - common = f.read().strip().split("\n") - if _is_cuda(): - with open(get_path("requirements-cuda.txt")) as f: + def _read_requirements(filename: str) -> List[str]: + with open(get_path(filename)) as f: requirements = f.read().strip().split("\n") + for line in requirements: + if line.startswith("-r "): + requirements.remove(line) + requirements += _read_requirements(line.split()[1]) + return requirements + + if _is_cuda(): + requirements = _read_requirements("requirements-cuda.txt") elif _is_hip(): - with open(get_path("requirements-rocm.txt")) as f: - requirements = f.read().strip().split("\n") + requirements = _read_requirements("requirements-rocm.txt") elif _is_neuron(): - with open(get_path("requirements-neuron.txt")) as f: - requirements = f.read().strip().split("\n") + requirements = _read_requirements("requirements-neuron.txt") elif _is_cpu(): - with open(get_path("requirements-cpu.txt")) as f: - requirements = f.read().strip().split("\n") + requirements = _read_requirements("requirements-cpu.txt") else: raise ValueError( - "Unsupported platform, please use CUDA, ROCM or Neuron.") - - return common + requirements + "Unsupported platform, please use CUDA, ROCm, Neuron, or CPU.") + return requirements ext_modules = [] From 9622c9de0e567694d3879c8ca63e290da6c4d013 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 17:06:27 +0000 Subject: [PATCH 06/12] Address comment --- setup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 95ed4f8813623..add5151a22c3c 100644 --- a/setup.py +++ b/setup.py @@ -329,11 +329,13 @@ def get_requirements() -> List[str]: def _read_requirements(filename: str) -> List[str]: with open(get_path(filename)) as f: requirements = f.read().strip().split("\n") + resolved_requirements = [] for line in requirements: if line.startswith("-r "): - requirements.remove(line) - requirements += _read_requirements(line.split()[1]) - return requirements + resolved_requirements += _read_requirements(line.split()[1]) + else: + resolved_requirements.append(line) + return resolved_requirements if _is_cuda(): requirements = _read_requirements("requirements-cuda.txt") From de27f043b7eab9a810be0ecbb677d5be3f72c014 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 17:35:22 +0000 Subject: [PATCH 07/12] torch 2.2.1 --- requirements-cuda.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements-cuda.txt b/requirements-cuda.txt index d9338f8cf9eac..8e537a0ce5ca9 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -3,8 +3,9 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 -torch == 2.1.2 -xformers == 0.0.23.post1 # Required for CUDA 12.1. pynvml == 11.5.0 +vllm-nccl-cu12>=2.18<2.19 # for downloading nccl library +torch == 2.2.1 +xformers == 0.0.25 # Requires PyTorch 2.2.1 triton >= 2.1.0 outlines == 0.0.34 # Requires torch >= 2.1.0 From 335f64b0fc805d1527ee2c02038e727ff7fe05d7 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 4 Apr 2024 17:37:48 +0000 Subject: [PATCH 08/12] yapf --- requirements-cuda.txt | 2 +- setup.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 8e537a0ce5ca9..36c2643b7da78 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,7 +4,7 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 pynvml == 11.5.0 -vllm-nccl-cu12>=2.18<2.19 # for downloading nccl library +vllm-nccl-cu12>=2.18<2.19 # for downloading nccl library torch == 2.2.1 xformers == 0.0.25 # Requires PyTorch 2.2.1 triton >= 2.1.0 diff --git a/setup.py b/setup.py index 6065bc702586b..98c92f9196e7e 100644 --- a/setup.py +++ b/setup.py @@ -344,8 +344,7 @@ def _read_requirements(filename: str) -> List[str]: for req in requirements: if "vllm-nccl-cu12" in req: modified_requirements.append( - req.replace("vllm-nccl-cu12", - f"vllm-nccl-cu{cuda_major}")) + req.replace("vllm-nccl-cu12", f"vllm-nccl-cu{cuda_major}")) else: modified_requirements.append(req) requirements = modified_requirements From c5b61727d78248247f16a85f55b759eb116b3192 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 5 Apr 2024 06:18:42 +0000 Subject: [PATCH 09/12] Fix torch CPU version --- requirements-cpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 5ca1264bd3944..98fa4d965fd52 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -2,5 +2,5 @@ -r requirements-common.txt # Dependencies for x86_64 CPUs -torch == 2.1.2+cpu +torch == 2.2.1+cpu triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error. From f9ea815ef78aee84325face1d19783974202119a Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 5 Apr 2024 06:19:01 +0000 Subject: [PATCH 10/12] Minor --- requirements-cpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 98fa4d965fd52..36d20bc9473ea 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -2,5 +2,5 @@ -r requirements-common.txt # Dependencies for x86_64 CPUs -torch == 2.2.1+cpu +torch == 2.2.1+cpu triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error. From 54ce085e3458476c970cba812df1110dae0cbd3e Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 5 Apr 2024 06:22:11 +0000 Subject: [PATCH 11/12] Move outlines to common --- requirements-common.txt | 1 + requirements-cuda.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-common.txt b/requirements-common.txt index dc331c11de14f..9a75cec18bb66 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -11,3 +11,4 @@ uvicorn[standard] pydantic >= 2.0 # Required for OpenAI server. prometheus_client >= 0.18.0 tiktoken == 0.6.0 # Required for DBRX tokenizer +outlines == 0.0.34 # Requires torch >= 2.1.0 diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 36c2643b7da78..4ced2ed3127ed 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -8,4 +8,3 @@ vllm-nccl-cu12>=2.18<2.19 # for downloading nccl library torch == 2.2.1 xformers == 0.0.25 # Requires PyTorch 2.2.1 triton >= 2.1.0 -outlines == 0.0.34 # Requires torch >= 2.1.0 From ab8c84f4d975813eb83de857913386bc521d6e88 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 5 Apr 2024 06:43:32 +0000 Subject: [PATCH 12/12] Add comma --- requirements-cuda.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 4ced2ed3127ed..6ee75e8139c04 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,7 +4,7 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 pynvml == 11.5.0 -vllm-nccl-cu12>=2.18<2.19 # for downloading nccl library +vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library torch == 2.2.1 xformers == 0.0.25 # Requires PyTorch 2.2.1 triton >= 2.1.0