diff --git a/README.md b/README.md
index 54d208fab..d97a14c0e 100644
--- a/README.md
+++ b/README.md
@@ -146,18 +146,17 @@ Within the deployment specification, locate and modify the command field.
 #### Original
 
 ```sh
-accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16
+accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --torch_dtype bfloat16
 ```
 
 #### Modify to enable 4-bit Quantization
 
 ```sh
-accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16 --load_in_4bit
+accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --torch_dtype bfloat16 --load_in_4bit
 ```
 
 Currently, we allow users to change the following paramenters manually:
 
-- `pipeline`: For text-generation models this can be either `text-generation` or `conversational`.
 - `load_in_4bit` or `load_in_8bit`: Model quantization resolution.
 
 Should you need to customize other parameters, kindly file an issue for potential future inclusion.
diff --git a/docs/custom-model-integration/custom-deployment-template.yaml b/docs/custom-model-integration/custom-deployment-template.yaml
index fe9c2c4ad..c369c9cdb 100644
--- a/docs/custom-model-integration/custom-deployment-template.yaml
+++ b/docs/custom-model-integration/custom-deployment-template.yaml
@@ -23,8 +23,6 @@ inference:
           - "--gpu_ids"
           - "all"
           - "tfs/inference_api.py"
-          - "--pipeline"
-          - "text-generation"
           - "--torch_dtype"
           - "float16"  # Set to "float16" for compatibility with V100 GPUs; use "bfloat16" for A100, H100 or newer GPUs
         volumeMounts:
diff --git a/docs/custom-model-integration/reference-image-deployment.yaml b/docs/custom-model-integration/reference-image-deployment.yaml
index 3a77dba08..36d518638 100644
--- a/docs/custom-model-integration/reference-image-deployment.yaml
+++ b/docs/custom-model-integration/reference-image-deployment.yaml
@@ -23,8 +23,6 @@ inference:
           - "--gpu_ids"
           - "all"
           - "inference_api.py"
-          - "--pipeline"
-          - "text-generation"
           - "--trust_remote_code"
           - "--allow_remote_files"
           - "--pretrained_model_name_or_path"
diff --git a/presets/workspace/models/falcon/model.go b/presets/workspace/models/falcon/model.go
index 34aac7824..5cf07e221 100644
--- a/presets/workspace/models/falcon/model.go
+++ b/presets/workspace/models/falcon/model.go
@@ -48,7 +48,6 @@ var (
 	baseCommandPresetFalconTuning    = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
 	falconRunParams                  = map[string]string{
 		"torch_dtype":   "bfloat16",
-		"pipeline":      "text-generation",
 		"chat_template": "/workspace/chat_templates/falcon-instruct.jinja",
 	}
 	falconRunParamsVLLM = map[string]string{
diff --git a/presets/workspace/models/mistral/model.go b/presets/workspace/models/mistral/model.go
index b3b8497f0..54b2604e6 100644
--- a/presets/workspace/models/mistral/model.go
+++ b/presets/workspace/models/mistral/model.go
@@ -35,7 +35,6 @@ var (
 	baseCommandPresetMistralTuning    = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
 	mistralRunParams                  = map[string]string{
 		"torch_dtype":   "bfloat16",
-		"pipeline":      "text-generation",
 		"chat_template": "/workspace/chat_templates/mistral-instruct.jinja",
 	}
 	mistralRunParamsVLLM = map[string]string{
diff --git a/presets/workspace/models/phi2/model.go b/presets/workspace/models/phi2/model.go
index bb7989df9..8afa66f19 100644
--- a/presets/workspace/models/phi2/model.go
+++ b/presets/workspace/models/phi2/model.go
@@ -29,7 +29,6 @@ var (
 	baseCommandPresetPhiTuning    = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
 	phiRunParams                  = map[string]string{
 		"torch_dtype": "float16",
-		"pipeline":    "text-generation",
 	}
 	phiRunParamsVLLM = map[string]string{
 		"dtype": "float16",
diff --git a/presets/workspace/models/phi3/model.go b/presets/workspace/models/phi3/model.go
index c8c40e4d1..84eb4d544 100644
--- a/presets/workspace/models/phi3/model.go
+++ b/presets/workspace/models/phi3/model.go
@@ -53,7 +53,6 @@ var (
 	baseCommandPresetPhiTuning    = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
 	phiRunParams                  = map[string]string{
 		"torch_dtype":       "auto",
-		"pipeline":          "text-generation",
 		"trust_remote_code": "",
 	}
 	phiRunParamsVLLM = map[string]string{
diff --git a/presets/workspace/models/qwen/model.go b/presets/workspace/models/qwen/model.go
index 20a09df74..f03dfdde0 100644
--- a/presets/workspace/models/qwen/model.go
+++ b/presets/workspace/models/qwen/model.go
@@ -29,7 +29,6 @@ var (
 	baseCommandPresetQwenTuning    = "cd /workspace/tfs/ && python3 metrics_server.py & accelerate launch"
 	qwenRunParams                  = map[string]string{
 		"torch_dtype": "bfloat16",
-		"pipeline":    "text-generation",
 	}
 	qwenRunParamsVLLM = map[string]string{
 		"dtype": "float16",
diff --git a/presets/workspace/test/manifests/falcon-40b-instruct/falcon-40b-instruct_hf.yaml b/presets/workspace/test/manifests/falcon-40b-instruct/falcon-40b-instruct_hf.yaml
index a44043894..25d8cd96a 100644
--- a/presets/workspace/test/manifests/falcon-40b-instruct/falcon-40b-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/falcon-40b-instruct/falcon-40b-instruct_hf.yaml
@@ -19,7 +19,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 2
diff --git a/presets/workspace/test/manifests/falcon-40b/falcon-40b_hf.yaml b/presets/workspace/test/manifests/falcon-40b/falcon-40b_hf.yaml
index 514d12e60..446d0b00c 100644
--- a/presets/workspace/test/manifests/falcon-40b/falcon-40b_hf.yaml
+++ b/presets/workspace/test/manifests/falcon-40b/falcon-40b_hf.yaml
@@ -19,7 +19,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 2
diff --git a/presets/workspace/test/manifests/falcon-7b-adapter/falcon-7b-adapter.yaml b/presets/workspace/test/manifests/falcon-7b-adapter/falcon-7b-adapter.yaml
index c48a1c2cf..7a02f0633 100644
--- a/presets/workspace/test/manifests/falcon-7b-adapter/falcon-7b-adapter.yaml
+++ b/presets/workspace/test/manifests/falcon-7b-adapter/falcon-7b-adapter.yaml
@@ -30,7 +30,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 2
diff --git a/presets/workspace/test/manifests/falcon-7b-instruct/falcon-7b-instruct_hf.yaml b/presets/workspace/test/manifests/falcon-7b-instruct/falcon-7b-instruct_hf.yaml
index 1b2092b36..02b3bbb86 100644
--- a/presets/workspace/test/manifests/falcon-7b-instruct/falcon-7b-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/falcon-7b-instruct/falcon-7b-instruct_hf.yaml
@@ -19,7 +19,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/falcon-7b/falcon-7b_hf.yaml b/presets/workspace/test/manifests/falcon-7b/falcon-7b_hf.yaml
index 56a775fff..36a97b2a8 100644
--- a/presets/workspace/test/manifests/falcon-7b/falcon-7b_hf.yaml
+++ b/presets/workspace/test/manifests/falcon-7b/falcon-7b_hf.yaml
@@ -19,7 +19,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/mistral-7b-instruct/mistral-7b-instruct_hf.yaml b/presets/workspace/test/manifests/mistral-7b-instruct/mistral-7b-instruct_hf.yaml
index 75179683f..9980fcf42 100644
--- a/presets/workspace/test/manifests/mistral-7b-instruct/mistral-7b-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/mistral-7b-instruct/mistral-7b-instruct_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/mistral-7b/mistral-7b_hf.yaml b/presets/workspace/test/manifests/mistral-7b/mistral-7b_hf.yaml
index 3eff5594f..7b810a353 100644
--- a/presets/workspace/test/manifests/mistral-7b/mistral-7b_hf.yaml
+++ b/presets/workspace/test/manifests/mistral-7b/mistral-7b_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/phi-2/phi-2_hf.yaml b/presets/workspace/test/manifests/phi-2/phi-2_hf.yaml
index cbc6f94e7..9d382d96d 100644
--- a/presets/workspace/test/manifests/phi-2/phi-2_hf.yaml
+++ b/presets/workspace/test/manifests/phi-2/phi-2_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype bfloat16
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype bfloat16
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/phi-3-medium-128k-instruct/phi-3-medium-128k-instruct_hf.yaml b/presets/workspace/test/manifests/phi-3-medium-128k-instruct/phi-3-medium-128k-instruct_hf.yaml
index 0adb122e4..5be9d124f 100644
--- a/presets/workspace/test/manifests/phi-3-medium-128k-instruct/phi-3-medium-128k-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/phi-3-medium-128k-instruct/phi-3-medium-128k-instruct_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/phi-3-medium-4k-instruct/phi-3-medium-4k-instruct_hf.yaml b/presets/workspace/test/manifests/phi-3-medium-4k-instruct/phi-3-medium-4k-instruct_hf.yaml
index 1d0d64e47..800b80886 100644
--- a/presets/workspace/test/manifests/phi-3-medium-4k-instruct/phi-3-medium-4k-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/phi-3-medium-4k-instruct/phi-3-medium-4k-instruct_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/phi-3-mini-128k-instruct/phi-3-mini-128k-instruct_hf.yaml b/presets/workspace/test/manifests/phi-3-mini-128k-instruct/phi-3-mini-128k-instruct_hf.yaml
index cf8898015..5f3759534 100644
--- a/presets/workspace/test/manifests/phi-3-mini-128k-instruct/phi-3-mini-128k-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/phi-3-mini-128k-instruct/phi-3-mini-128k-instruct_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/phi-3-mini-4k-instruct/phi-3-mini-4k-instruct_hf.yaml b/presets/workspace/test/manifests/phi-3-mini-4k-instruct/phi-3-mini-4k-instruct_hf.yaml
index 1d7069a38..fb7619d75 100644
--- a/presets/workspace/test/manifests/phi-3-mini-4k-instruct/phi-3-mini-4k-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/phi-3-mini-4k-instruct/phi-3-mini-4k-instruct_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/phi-3-small-128k-instruct/phi-3-small-128k-instruct.yaml b/presets/workspace/test/manifests/phi-3-small-128k-instruct/phi-3-small-128k-instruct.yaml
index 1827155f4..680d4efbe 100644
--- a/presets/workspace/test/manifests/phi-3-small-128k-instruct/phi-3-small-128k-instruct.yaml
+++ b/presets/workspace/test/manifests/phi-3-small-128k-instruct/phi-3-small-128k-instruct.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/phi-3-small-8k-instruct/phi-3-small-8k-instruct.yaml b/presets/workspace/test/manifests/phi-3-small-8k-instruct/phi-3-small-8k-instruct.yaml
index 1f515cc6a..c693b97af 100644
--- a/presets/workspace/test/manifests/phi-3-small-8k-instruct/phi-3-small-8k-instruct.yaml
+++ b/presets/workspace/test/manifests/phi-3-small-8k-instruct/phi-3-small-8k-instruct.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
         resources:
           requests:
             nvidia.com/gpu: 1
diff --git a/presets/workspace/test/manifests/qwen2-5-coder-7b-instruct/qwen2-5-coder-7b-instruct_hf.yaml b/presets/workspace/test/manifests/qwen2-5-coder-7b-instruct/qwen2-5-coder-7b-instruct_hf.yaml
index e92d906d7..81f096b05 100644
--- a/presets/workspace/test/manifests/qwen2-5-coder-7b-instruct/qwen2-5-coder-7b-instruct_hf.yaml
+++ b/presets/workspace/test/manifests/qwen2-5-coder-7b-instruct/qwen2-5-coder-7b-instruct_hf.yaml
@@ -18,7 +18,7 @@ spec:
         command:
           - /bin/sh
           - -c
-          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --pipeline text-generation --torch_dtype auto --trust_remote_code
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all /workspace/tfs/inference_api.py --torch_dtype auto --trust_remote_code
         resources:
           requests:
             nvidia.com/gpu: 2