Skip to content

Commit

Permalink
fix: remove pipeline param
Browse files Browse the repository at this point in the history
  • Loading branch information
ishaansehgal99 committed Jan 12, 2025
1 parent b10cfc5 commit 6d5e0f2
Show file tree
Hide file tree
Showing 23 changed files with 17 additions and 27 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,18 +146,17 @@ Within the deployment specification, locate and modify the command field.
#### Original

```sh
accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16
accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --torch_dtype bfloat16
```

#### Modify to enable 4-bit Quantization

```sh
accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16 --load_in_4bit
accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --torch_dtype bfloat16 --load_in_4bit
```

Currently, we allow users to change the following paramenters manually:

- `pipeline`: For text-generation models this can be either `text-generation` or `conversational`.
- `load_in_4bit` or `load_in_8bit`: Model quantization resolution.

Should you need to customize other parameters, kindly file an issue for potential future inclusion.
Expand Down
2 changes: 0 additions & 2 deletions docs/custom-model-integration/custom-deployment-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ inference:
- "--gpu_ids"
- "all"
- "tfs/inference_api.py"
- "--pipeline"
- "text-generation"
- "--torch_dtype"
- "float16" # Set to "float16" for compatibility with V100 GPUs; use "bfloat16" for A100, H100 or newer GPUs
volumeMounts:
Expand Down
2 changes: 0 additions & 2 deletions docs/custom-model-integration/reference-image-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ inference:
- "--gpu_ids"
- "all"
- "inference_api.py"
- "--pipeline"
- "text-generation"
- "--trust_remote_code"
- "--allow_remote_files"
- "--pretrained_model_name_or_path"
Expand Down
1 change: 0 additions & 1 deletion presets/workspace/models/falcon/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ var (
baseCommandPresetFalconTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
falconRunParams = map[string]string{
"torch_dtype": "bfloat16",
"pipeline": "text-generation",
"chat_template": "/workspace/chat_templates/falcon-instruct.jinja",
}
falconRunParamsVLLM = map[string]string{
Expand Down
1 change: 0 additions & 1 deletion presets/workspace/models/mistral/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ var (
baseCommandPresetMistralTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
mistralRunParams = map[string]string{
"torch_dtype": "bfloat16",
"pipeline": "text-generation",
"chat_template": "/workspace/chat_templates/mistral-instruct.jinja",
}
mistralRunParamsVLLM = map[string]string{
Expand Down
1 change: 0 additions & 1 deletion presets/workspace/models/phi2/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ var (
baseCommandPresetPhiTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
phiRunParams = map[string]string{
"torch_dtype": "float16",
"pipeline": "text-generation",
}
phiRunParamsVLLM = map[string]string{
"dtype": "float16",
Expand Down
1 change: 0 additions & 1 deletion presets/workspace/models/phi3/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ var (
baseCommandPresetPhiTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
phiRunParams = map[string]string{
"torch_dtype": "auto",
"pipeline": "text-generation",
"trust_remote_code": "",
}
phiRunParamsVLLM = map[string]string{
Expand Down
1 change: 0 additions & 1 deletion presets/workspace/models/qwen/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ var (
baseCommandPresetQwenTuning = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
qwenRunParams = map[string]string{
"torch_dtype": "bfloat16",
"pipeline": "text-generation",
}
qwenRunParamsVLLM = map[string]string{
"dtype": "float16",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 1
Expand Down
2 changes: 1 addition & 1 deletion presets/workspace/test/manifests/phi-2/phi-2_hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
resources:
requests:
nvidia.com/gpu: 2
Expand Down

0 comments on commit 6d5e0f2

Please sign in to comment.