Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Features/vllm-base-pipeline #138

Merged
merged 32 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a6d2185
isolates llms into specific module
steffencruz Feb 28, 2024
76284e0
implement base and hf pipeline
p-ferreira Feb 28, 2024
21fa3ce
adds base llm class
p-ferreira Feb 28, 2024
b28ad5f
adds vllm model wrapper and pipeline + overall improvements
p-ferreira Feb 29, 2024
f9de780
drop-in replacement for hf zephyr
p-ferreira Feb 29, 2024
e76136e
Merge branch 'main' into features/vllm-test
p-ferreira Mar 1, 2024
76aedc2
update pipeline param type on tasks
steffencruz Mar 1, 2024
145b3cb
improving variable name, removes unused commented code
p-ferreira Mar 4, 2024
5aaccd5
fix broken tests
p-ferreira Mar 4, 2024
27c02bf
code declaration improvement on base_llm
p-ferreira Mar 4, 2024
7e035bf
unit test + overall adjustments
p-ferreira Mar 5, 2024
195aa33
runs black on new changes
p-ferreira Mar 5, 2024
ac3dee5
adds gpu calc utils functions + update vllm_llm
p-ferreira Mar 5, 2024
40aac18
update readme with vllm gpu device limitation
p-ferreira Mar 6, 2024
4f4e9ac
updates unit test
p-ferreira Mar 6, 2024
5cd27ca
upgrade load_vllm to handle max-len vs kv cache exceptional scenarios
p-ferreira Mar 6, 2024
f3245cc
runs black on vllm file
p-ferreira Mar 6, 2024
d70dc3a
refactor vllm load second attempt approach
p-ferreira Mar 6, 2024
7895c88
adds extra cuda sync before mem_get_info
p-ferreira Mar 6, 2024
b58ee03
fix imports of vllm_llm
p-ferreira Mar 6, 2024
9bc6060
adds tests for new vllm functionality
p-ferreira Mar 6, 2024
75c8a4e
Update neurons/miners/zephyr/miner.py
p-ferreira Mar 7, 2024
c8b9e28
pr adjustments: file renaming + vllm requirements
p-ferreira Mar 7, 2024
7244789
fix renaming imports
p-ferreira Mar 7, 2024
fb3eb98
rollback on requirements
p-ferreira Mar 7, 2024
1a4c69d
Merge pull request #149 from opentensor/staging
p-ferreira Mar 11, 2024
3a0517b
Merge branch 'main' into features/vllm-test
p-ferreira Mar 12, 2024
f5e241a
fix merging issues
p-ferreira Mar 12, 2024
31dbfb9
adjust cuda mock on unit test
p-ferreira Mar 13, 2024
1cb5a1e
Merge branch 'features/vllm-test' of https://github.com/opentensor/pr…
p-ferreira Mar 13, 2024
12769d9
mock cuda device call on unit test
p-ferreira Mar 13, 2024
e3545ac
fix mock target on patch
p-ferreira Mar 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
unit test + overall adjustments
  • Loading branch information
p-ferreira committed Mar 5, 2024
commit 7e035bf5e0894731a09e1b66159354261d081ba0
1 change: 1 addition & 0 deletions prompting/cleaners/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .cleaner import CleanerPipeline
12 changes: 8 additions & 4 deletions prompting/llms/hf_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,19 @@ def __init__(
self.model = model_id
self.device = device
self.torch_dtype = torch_dtype

self.mock = mock
self.pipeline = load_hf_pipeline(
model_id, device, torch_dtype, mock, model_kwargs
)
self.tokenizer = self.pipeline.tokenizer

def __call__(self, composed_prompt: str, **kwargs: dict) -> str:
return self.pipeline(composed_prompt, **kwargs)
if self.mock:
return self.pipeline(composed_prompt, **kwargs)

# Extract the generated text from the pipeline output
outputs = self.pipeline(composed_prompt, **kwargs)
return outputs[0]["generated_text"]


class HuggingFaceLLM(BaseLLM):
Expand Down Expand Up @@ -135,10 +140,9 @@ def _make_prompt(self, messages: List[Dict[str, str]]):
def forward(self, messages: List[Dict[str, str]]):
composed_prompt = self._make_prompt(messages)
# System prompt is composed in the prompt
outputs = self.llm_pipeline(
response = self.llm_pipeline(
composed_prompt=composed_prompt, **self.model_kwargs
)
response = outputs[0]["generated_text"]

response = response.replace(composed_prompt, "").strip()

Expand Down
7 changes: 4 additions & 3 deletions prompting/llms/vllm_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@
from vllm import LLM, SamplingParams
from prompting.cleaners.cleaner import CleanerPipeline
from prompting.llms import BasePipeline, BaseLLM
from prompting.mock import MockPipeline

def load_vllm_pipeline(model_id, mock=False):
"""Loads the VLLM pipeline for the LLM, or a mock pipeline if mock=True"""
if mock or model_id == "mock":
return None
return MockPipeline(model_id)

return LLM(model=model_id)

Expand All @@ -37,7 +38,7 @@ def __init__(self, model_id, device=None, mock=False):

def __call__(self, composed_prompt: str, **model_kwargs: Dict) -> str:
if self.mock:
return composed_prompt
return self.llm(composed_prompt, **model_kwargs)

# Compose sampling params
temperature = model_kwargs.get("temperature", 0.8)
Expand Down Expand Up @@ -120,7 +121,7 @@ def forward(self, messages: List[Dict[str, str]]):

if __name__ == "__main__":
# Example usage
llm_pipeline = vLLMPipeline(model_id="HuggingFaceH4/zephyr-7b-beta", mock=False)
llm_pipeline = vLLMPipeline(model_id="HuggingFaceH4/zephyr-7b-beta", mock=True)
llm = vLLM_LLM(llm_pipeline, system_prompt="You are a helpful AI assistant")

message = "What is the capital of Texas?"
Expand Down
4 changes: 2 additions & 2 deletions prompting/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def __init__(
def __repr__(self):
return f"{self.__class__.__name__}(phrase={self.model.phrase})"

def __call__(self, messages, **kwargs):
return self.forward(messages, **kwargs)
def __call__(self, composed_prompt, **kwargs):
return self.forward(composed_prompt, **kwargs)

def forward(self, messages, **kwargs):
output = self.model(messages)
Expand Down
7 changes: 7 additions & 0 deletions tests/fixtures/cleaner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from prompting.cleaners import CleanerPipeline

DEFAULT_CLEANER_PIPELINE = CleanerPipeline([
dict(name="remove_quotes"),
dict(name="prune_ending"),
dict(name="remove_roles"),
])
19 changes: 18 additions & 1 deletion tests/fixtures/llm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
from prompting.mock import MockPipeline
from prompting.llms import vLLM_LLM, HuggingFaceLLM, HuggingFacePipeline, vLLMPipeline

LLM_PIPELINE = MockPipeline("This is just another test.")
def mock_llm_pipeline():
return MockPipeline("This is just another test.")

def llms():
pipeline = MockPipeline("This is just another test.")
llms = [
vLLM_LLM(pipeline, ''),
HuggingFaceLLM(pipeline, '')
]
return llms

def pipelines():
# Return pipeline types to be instantiated downstream
return [
HuggingFacePipeline,
vLLMPipeline
]
46 changes: 23 additions & 23 deletions tests/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from prompting.tasks import Task
from prompting.agent import HumanAgent, create_persona

from .fixtures.llm import LLM_PIPELINE
from .fixtures.llm import mock_llm_pipeline
from .fixtures.task import CONTEXTS, TASKS

"""
Expand Down Expand Up @@ -33,55 +33,55 @@
@pytest.mark.parametrize('task', TASKS)
def test_agent_creation_with_dataset_context(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent is not None

@pytest.mark.parametrize('task', TASKS)
def test_agent_contains_persona(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent.persona is not None

@pytest.mark.parametrize('task', TASKS)
def test_user_can_set_agent_persona(task: Task):
context = CONTEXTS[task]
persona = create_persona()
task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True, persona=persona)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True, persona=persona)
assert agent.persona == persona

@pytest.mark.parametrize('task', TASKS)
def test_agent_contains_task(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent.task is not None

@pytest.mark.parametrize('task', TASKS)
def test_agent_has_system_prompt(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent.system_prompt is not None

@pytest.mark.parametrize('task', TASKS)
def test_user_can_set_agent_system_prompt_template(task: Task):
context = CONTEXTS[task]
system_template = "Today I am in a {mood} mood because i wanted {desc} related to {topic} ({subtopic}) in a {tone} tone. My intention is {goal}, but my problem is {query}"

task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True, system_template=system_template)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True, system_template=system_template)
assert agent.system_prompt_template


@pytest.mark.parametrize('task', TASKS)
@pytest.mark.parametrize('begin_conversation', [True, False])
def test_agent_can_make_challenges(task: Task, begin_conversation: bool):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=begin_conversation)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=begin_conversation)
if begin_conversation:
assert agent.challenge is not None
else:
Expand All @@ -90,30 +90,30 @@ def test_agent_can_make_challenges(task: Task, begin_conversation: bool):
@pytest.mark.parametrize('task', TASKS)
def test_agent_progress_is_zero_on_init(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent.progress == 0

@pytest.mark.parametrize('task', TASKS)
def test_agent_progress_is_one_when_task_is_complete(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
task.complete = True
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent.progress == 1

@pytest.mark.parametrize('task', TASKS)
def test_agent_finished_is_true_when_task_is_complete(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
task.complete = True
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent.finished == True

@pytest.mark.parametrize('task', TASKS)
def test_agent_finished_is_false_when_task_is_not_complete(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
task.complete = False
agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
agent = HumanAgent(llm_pipeline=mock_llm_pipeline(), task=task, begin_conversation=True)
assert agent.finished == False
8 changes: 4 additions & 4 deletions tests/test_dataset_task_integration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from prompting.tasks import Task
from .fixtures.llm import LLM_PIPELINE
from .fixtures.llm import mock_llm_pipeline
from .fixtures.task import CONTEXTS, TASKS


Expand All @@ -15,18 +15,18 @@
@pytest.mark.parametrize('task', TASKS)
def test_task_creation_with_dataset_context(task: Task):
context = CONTEXTS[task]
task(llm_pipeline=LLM_PIPELINE, context=context)
task(llm_pipeline=mock_llm_pipeline(), context=context)
assert task is not None

@pytest.mark.parametrize('task', TASKS)
def test_task_contains_query(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
assert task.query is not None

@pytest.mark.parametrize('task', TASKS)
def test_task_contains_reference(task: Task):
context = CONTEXTS[task]
task = task(llm_pipeline=LLM_PIPELINE, context=context)
task = task(llm_pipeline=mock_llm_pipeline(), context=context)
assert task.reference is not None

59 changes: 59 additions & 0 deletions tests/test_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# llm, input, expected output, cleaner

##test_llm_forward
# test llm query (check messages, times)
# test llm query (calls forward, clean_response)

import pytest
from prompting.llms import BaseLLM, BasePipeline
from prompting.cleaners import CleanerPipeline
from prompting.mock import MockPipeline
from .fixtures.llm import llms, pipelines
from .fixtures.cleaner import DEFAULT_CLEANER_PIPELINE

@pytest.mark.parametrize('input, expected_result, cleaner',
[('"I am a quote. User: I know you are. I am asking a question. What is th"', '"I am a quote. User: I know you are. I am asking a question. What is th"', None),
('"I am a quote. User: I know you are. I am asking a question. What is th"', "I am a quote. I know you are. I am asking a question.", DEFAULT_CLEANER_PIPELINE)]
)
@pytest.mark.parametrize('llm', llms())
def test_llm_clean_response(input: str, expected_result: str, cleaner: CleanerPipeline, llm: BaseLLM):
result = llm.clean_response(cleaner=cleaner, response=input)
assert result == expected_result


@pytest.mark.parametrize('pipeline', pipelines())
def test_load_pipeline_mock(pipeline: BasePipeline):
# Note that the model_id will be used internally as static response for the mock pipeline
model_id = "gpt2"
pipeline_instance = pipeline(model_id=model_id, device='cpu', mock=True)
pipeline_message = pipeline_instance('')

mock_message = MockPipeline(model_id).forward(messages=[])
assert mock_message == pipeline_message


@pytest.mark.parametrize('llm', llms())
def test_llm_query(llm: BaseLLM):
message = 'test'
llm.query(message)

# Assert that stateful operation where 3 messages are saved:
# the system prompt (on llm init), the user message and the assistant reply
assert len(llm.messages) == 3
assert len(llm.times) == 3

assert llm.messages[0]['role'] == 'system'

assert llm.messages[1]['role'] == 'user'
assert llm.messages[1]['content'] == message

assert llm.messages[2]['role'] == 'assistant'

@pytest.mark.parametrize('llm', llms())
def test_llm_forward(llm: BaseLLM):
llm.forward(llm.messages)

# Assert stateless operation of the model with only history of system prompt
assert len(llm.messages) == 1
assert len(llm.times) == 1
assert llm.messages[0]['role'] == 'system'
Loading