Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add coveralls #203

Merged
merged 5 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ jobs:
steps:
- uses: actions/checkout@v2



- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
Expand All @@ -27,7 +29,15 @@ jobs:
- name: Install dependencies
run: poetry install --with dev

- name: Run test
run: poetry run pytest tests/
- name: Run tests with coverage
run: |
poetry run coverage run -m pytest tests/
poetry run coverage report
poetry run coverage html
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- name: Coveralls GitHub Action
uses: coverallsapp/[email protected]
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi
[![GitHub stars](https://img.shields.io/github/stars/jxnl/instructor.svg)](https://github.com/jxnl/instructor/stargazers)
[![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor)
[![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco)
[![Coverage Status](https://coveralls.io/repos/github/jxnl/instructor/badge.svg?branch=add-coveralls)](https://coveralls.io/github/jxnl/instructor?branch=add-coveralls)

Dive into the world of Python-based structured extraction, empowered by OpenAI's cutting-edge function calling API. Instructor stands out for its simplicity, transparency, and user-centric design. Whether you're a seasoned developer or just starting out, you'll find Instructor's approach intuitive and its results insightful.

Expand Down
4 changes: 2 additions & 2 deletions examples/learn-async/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

client = instructor.apatch(AsyncOpenAI())


class Timer:
def __init__(self, name):
self.name = name
Expand Down Expand Up @@ -112,7 +113,6 @@ async def rate_limited_extract_person(text: str) -> Person:
print("asyncio.as_completed (rate limited):", all_persons)



if __name__ == "__main__":
asyncio.run(main())
"""
Expand All @@ -123,4 +123,4 @@ async def rate_limited_extract_person(text: str) -> Person:

asyncio.gather (rate limited) took 3.04 seconds
asyncio.as_completed (rate limited) took 3.26 seconds
"""
"""
7 changes: 4 additions & 3 deletions examples/validators/llm_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,16 @@ class QuestionAnswer(BaseModel):
"""




class QuestionAnswerNoEvil(BaseModel):
question: str
answer: Annotated[
str,
BeforeValidator(llm_validator("don't say objectionable things", openai_client=client))
BeforeValidator(
llm_validator("don't say objectionable things", openai_client=client)
),
]


try:
qa = QuestionAnswerNoEvil(
question="What is the meaning of life?",
Expand Down
10 changes: 5 additions & 5 deletions instructor/dsl/multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def tasks_from_chunks(cls, json_chunks):
@staticmethod
def extract_json(completion):
for chunk in completion:
if chunk["choices"]:
delta = chunk["choices"][0]["delta"]
if "function_call" in delta:
if "arguments" in delta["function_call"]:
yield delta["function_call"]["arguments"]
try:
if json_chunk := chunk.choices[0].delta.function_call.arguments:
yield json_chunk
except AttributeError:
pass

@staticmethod
def get_object(str, stack):
Expand Down
357 changes: 220 additions & 137 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ mkdocs-material = "^9.1.18"
mkdocstrings = "^0.22.0"
mkdocstrings-python = "^1.1.2"
pytest-asyncio = "^0.21.1"
coverage = "^7.3.2"

[build-system]
requires = ["poetry-core"]
Expand Down
49 changes: 49 additions & 0 deletions tests/openai/test_multitask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import time

from typing import Iterable
from openai import OpenAI
from pydantic import BaseModel

import instructor


client = instructor.patch(OpenAI())

Comment on lines +10 to +11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The client variable is created outside of any function or class scope. This could potentially lead to issues with state management if the client is used concurrently in a multi-threaded or asynchronous environment. Consider moving the instantiation of the client into a setup function or within the test function itself to ensure that each test has a clean state.


class User(BaseModel):
name: str
age: int


def test_multi_user():
def stream_extract(input: str, cls) -> Iterable[User]:
MultiUser = instructor.MultiTask(cls)
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
stream=True,
functions=[MultiUser.openai_schema],
function_call={"name": MultiUser.openai_schema["name"]},
messages=[
{
"role": "system",
"content": "You are a perfect entity extraction system",
},
{
"role": "user",
"content": (
f"Consider the data below:\n{input}"
"Correctly segment it into entitites"
"Make sure the JSON is correct"
),
},
],
max_tokens=1000,
)
return MultiUser.from_streaming_response(completion)

resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
assert len(resp) == 2
assert resp[0].name == "Jason"
assert resp[0].age == 20
assert resp[1].name == "Sarah"
assert resp[1].age == 30
Comment on lines +18 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test function test_multi_user is well-structured and seems to correctly test the functionality of the instructor.MultiTask decorator with the OpenAI model. However, there is a potential issue with the string concatenation in lines 31-34. The lines are missing a space or a newline character between sentences, which could lead to incorrect input being sent to the OpenAI model. This should be corrected to ensure the input string is formatted as intended.

- "content": (
-     f"Consider the data below:\n{input}"
-     "Correctly segment it into entitites"
-     "Make sure the JSON is correct"
+ "content": (
+     f"Consider the data below:\n{input} "
+     "Correctly segment it into entities. "
+     "Make sure the JSON is correct."
),

Commitable suggestion

[!IMPORTANT]
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
def test_multi_user():
def stream_extract(input: str, cls) -> Iterable[User]:
MultiUser = instructor.MultiTask(cls)
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
stream=True,
functions=[MultiUser.openai_schema],
function_call={"name": MultiUser.openai_schema["name"]},
messages=[
{
"role": "system",
"content": "You are a perfect entity extraction system",
},
{
"role": "user",
"content": (
f"Consider the data below:\n{input}"
"Correctly segment it into entitites"
"Make sure the JSON is correct"
),
},
],
max_tokens=1000,
)
return MultiUser.from_streaming_response(completion)
resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
assert len(resp) == 2
assert resp[0].name == "Jason"
assert resp[0].age == 20
assert resp[1].name == "Sarah"
assert resp[1].age == 30
def test_multi_user():
def stream_extract(input: str, cls) -> Iterable[User]:
MultiUser = instructor.MultiTask(cls)
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
stream=True,
functions=[MultiUser.openai_schema],
function_call={"name": MultiUser.openai_schema["name"]},
messages=[
{
"role": "system",
"content": "You are a perfect entity extraction system",
},
{
"role": "user",
"content": (
f"Consider the data below:\n{input} "
"Correctly segment it into entities. "
"Make sure the JSON is correct."
),
},
],
max_tokens=1000,
)
return MultiUser.from_streaming_response(completion)
resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
assert len(resp) == 2
assert resp[0].name == "Jason"
assert resp[0].age == 20
assert resp[1].name == "Sarah"
assert resp[1].age == 30

112 changes: 112 additions & 0 deletions tests/openai/test_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import pytest
import instructor

from instructor import llm_validator
from typing_extensions import Annotated
from pydantic import field_validator, BaseModel, BeforeValidator, ValidationError
from openai import OpenAI, AsyncOpenAI

client = instructor.patch(OpenAI())
aclient = instructor.patch(AsyncOpenAI())


class UserExtract(BaseModel):
name: str
age: int

@field_validator("name")
@classmethod
def validate_name(cls, v):
if v.upper() != v:
raise ValueError("Name should be uppercase")
return v


def test_runmodel_validator():
model = client.chat.completions.create(
model="gpt-3.5-turbo",
response_model=UserExtract,
max_retries=2,
messages=[
{"role": "user", "content": "Extract jason is 25 years old"},
],
)
assert isinstance(model, UserExtract), "Should be instance of UserExtract"
assert model.name == "JASON"
assert hasattr(
model, "_raw_response"
), "The raw response should be available from OpenAI"


@pytest.mark.asyncio
async def test_runmodel_async_validator():
model = await aclient.chat.completions.create(
model="gpt-3.5-turbo",
response_model=UserExtract,
max_retries=2,
messages=[
{"role": "user", "content": "Extract jason is 25 years old"},
],
)
assert isinstance(model, UserExtract), "Should be instance of UserExtract"
assert model.name == "JASON"
assert hasattr(
model, "_raw_response"
), "The raw response should be available from OpenAI"


class UserExtractSimple(BaseModel):
name: str
age: int


@pytest.mark.asyncio
async def test_async_runmodel():
model = await aclient.chat.completions.create(
model="gpt-3.5-turbo",
response_model=UserExtractSimple,
messages=[
{"role": "user", "content": "Extract jason is 25 years old"},
],
)
assert isinstance(
model, UserExtractSimple
), "Should be instance of UserExtractSimple"
assert model.name.lower() == "jason"
assert hasattr(
model, "_raw_response"
), "The raw response should be available from OpenAI"


def test_runmodel():
model = client.chat.completions.create(
model="gpt-3.5-turbo",
response_model=UserExtractSimple,
messages=[
{"role": "user", "content": "Extract jason is 25 years old"},
],
)
assert isinstance(
model, UserExtractSimple
), "Should be instance of UserExtractSimple"
assert model.name.lower() == "jason"
assert hasattr(
model, "_raw_response"
), "The raw response should be available from OpenAI"


def test_runmodel_validator_error():
class QuestionAnswerNoEvil(BaseModel):
question: str
answer: Annotated[
str,
BeforeValidator(
llm_validator("don't say objectionable things", openai_client=client)
),
]

with pytest.raises(ValidationError):
QuestionAnswerNoEvil(
question="What is the meaning of life?",
answer="The meaning of life is to be evil and steal",
)
Loading