-
-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Lucas Wilkinson <[email protected]>
- Loading branch information
1 parent
a6c0438
commit 1df44c3
Showing
2 changed files
with
100 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from vllm import LLM, SamplingParams | ||
|
||
llm = LLM( | ||
model="deepseek-ai/DeepSeek-V2-Lite", | ||
trust_remote_code=True, | ||
) | ||
sampling_params = SamplingParams(temperature=0.5) | ||
|
||
|
||
def print_outputs(outputs): | ||
for output in outputs: | ||
prompt = output.prompt | ||
generated_text = output.outputs[0].text | ||
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") | ||
print("-" * 80) | ||
|
||
|
||
print("=" * 80) | ||
|
||
# In this script, we demonstrate how to pass input to the chat method: | ||
|
||
conversation = [ | ||
{ | ||
"role": "system", | ||
"content": "You are a helpful assistant" | ||
}, | ||
{ | ||
"role": "user", | ||
"content": "Hello" | ||
}, | ||
{ | ||
"role": "assistant", | ||
"content": "Hello! How can I assist you today?" | ||
}, | ||
{ | ||
"role": "user", | ||
"content": "Write an essay about the importance of higher education.", | ||
}, | ||
] | ||
outputs = llm.chat(conversation, | ||
sampling_params=sampling_params, | ||
use_tqdm=False) | ||
print_outputs(outputs) | ||
|
||
# You can run batch inference with llm.chat API | ||
conversation = [ | ||
{ | ||
"role": "system", | ||
"content": "You are a helpful assistant" | ||
}, | ||
{ | ||
"role": "user", | ||
"content": "Hello" | ||
}, | ||
{ | ||
"role": "assistant", | ||
"content": "Hello! How can I assist you today?" | ||
}, | ||
{ | ||
"role": "user", | ||
"content": "Write an essay about the importance of higher education.", | ||
}, | ||
] | ||
conversations = [conversation for _ in range(10)] | ||
|
||
# We turn on tqdm progress bar to verify it's indeed running batch inference | ||
outputs = llm.chat(messages=conversations, | ||
sampling_params=sampling_params, | ||
use_tqdm=True) | ||
print_outputs(outputs) | ||
|
||
# A chat template can be optionally supplied. | ||
# If not, the model will use its default chat template. | ||
|
||
# with open('template_falcon_180b.jinja', "r") as f: | ||
# chat_template = f.read() | ||
|
||
# outputs = llm.chat( | ||
# conversations, | ||
# sampling_params=sampling_params, | ||
# use_tqdm=False, | ||
# chat_template=chat_template, | ||
# ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters