instructor-ai · jxnl · Nov 20, 2023 · Nov 20, 2023 · Nov 20, 2023 · Nov 20, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -16,6 +16,8 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
+
+
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
@@ -27,7 +29,15 @@ jobs:
       - name: Install dependencies
         run: poetry install --with dev
 
-      - name: Run test
-        run: poetry run pytest tests/
+      - name: Run tests with coverage
+        run: |
+          poetry run coverage run -m pytest tests/
+          poetry run coverage report 
+          poetry run coverage html
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+      - name: Coveralls GitHub Action
+        uses: coverallsapp/[email protected]
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@ _Structured extraction in Python, powered by OpenAI's function calling api, desi
 [![GitHub stars](https://img.shields.io/github/stars/jxnl/instructor.svg)](https://github.com/jxnl/instructor/stargazers)
 [![Documentation](https://img.shields.io/badge/docs-available-brightgreen)](https://jxnl.github.io/instructor)
 [![Twitter Follow](https://img.shields.io/twitter/follow/jxnlco?style=social)](https://twitter.com/jxnlco)
+[![Coverage Status](https://coveralls.io/repos/github/jxnl/instructor/badge.svg?branch=add-coveralls)](https://coveralls.io/github/jxnl/instructor?branch=add-coveralls)
 
 Dive into the world of Python-based structured extraction, empowered by OpenAI's cutting-edge function calling API. Instructor stands out for its simplicity, transparency, and user-centric design. Whether you're a seasoned developer or just starting out, you'll find Instructor's approach intuitive and its results insightful.
 

diff --git a/examples/learn-async/run.py b/examples/learn-async/run.py
@@ -8,6 +8,7 @@
 
 client = instructor.apatch(AsyncOpenAI())
 
+
 class Timer:
     def __init__(self, name):
         self.name = name
@@ -112,7 +113,6 @@ async def rate_limited_extract_person(text: str) -> Person:
         print("asyncio.as_completed (rate limited):", all_persons)
 
 
-
 if __name__ == "__main__":
     asyncio.run(main())
     """
@@ -123,4 +123,4 @@ async def rate_limited_extract_person(text: str) -> Person:
 
     asyncio.gather (rate limited) took 3.04 seconds
     asyncio.as_completed (rate limited) took 3.26 seconds
-    """
+    """
diff --git a/examples/validators/llm_validator.py b/examples/validators/llm_validator.py
@@ -43,15 +43,16 @@ class QuestionAnswer(BaseModel):
 """
 
 
-
-
 class QuestionAnswerNoEvil(BaseModel):
     question: str
     answer: Annotated[
         str,
-        BeforeValidator(llm_validator("don't say objectionable things", openai_client=client))
+        BeforeValidator(
+            llm_validator("don't say objectionable things", openai_client=client)
+        ),
     ]
 
+
 try:
     qa = QuestionAnswerNoEvil(
         question="What is the meaning of life?",

diff --git a/instructor/dsl/multitask.py b/instructor/dsl/multitask.py
@@ -33,11 +33,11 @@ def tasks_from_chunks(cls, json_chunks):
     @staticmethod
     def extract_json(completion):
         for chunk in completion:
-            if chunk["choices"]:
-                delta = chunk["choices"][0]["delta"]
-                if "function_call" in delta:
-                    if "arguments" in delta["function_call"]:
-                        yield delta["function_call"]["arguments"]
+            try:
+                if json_chunk := chunk.choices[0].delta.function_call.arguments:
+                    yield json_chunk
+            except AttributeError:
+                pass
 
     @staticmethod
     def get_object(str, stack):

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ mkdocs-material = "^9.1.18"
 mkdocstrings = "^0.22.0"
 mkdocstrings-python = "^1.1.2"
 pytest-asyncio = "^0.21.1"
+coverage = "^7.3.2"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/tests/openai/test_multitask.py b/tests/openai/test_multitask.py
@@ -0,0 +1,49 @@
+import time
+
+from typing import Iterable
+from openai import OpenAI
+from pydantic import BaseModel
+
+import instructor
+
+
+client = instructor.patch(OpenAI())
+
+
+class User(BaseModel):
+    name: str
+    age: int
+
+
+def test_multi_user():
+    def stream_extract(input: str, cls) -> Iterable[User]:
+        MultiUser = instructor.MultiTask(cls)
+        completion = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            stream=True,
+            functions=[MultiUser.openai_schema],
+            function_call={"name": MultiUser.openai_schema["name"]},
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a perfect entity extraction system",
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        f"Consider the data below:\n{input}"
+                        "Correctly segment it into entitites"
+                        "Make sure the JSON is correct"
+                    ),
+                },
+            ],
+            max_tokens=1000,
+        )
+        return MultiUser.from_streaming_response(completion)
+
+    resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
+    assert len(resp) == 2
+    assert resp[0].name == "Jason"
+    assert resp[0].age == 20
+    assert resp[1].name == "Sarah"
+    assert resp[1].age == 30
-def test_multi_user():
-    def stream_extract(input: str, cls) -> Iterable[User]:
-        MultiUser = instructor.MultiTask(cls)
-        completion = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            stream=True,
-            functions=[MultiUser.openai_schema],
-            function_call={"name": MultiUser.openai_schema["name"]},
-            messages=[
-                {
-                    "role": "system",
-                    "content": "You are a perfect entity extraction system",
-                },
-                {
-                    "role": "user",
-                    "content": (
-                        f"Consider the data below:\n{input}"
-                        "Correctly segment it into entitites"
-                        "Make sure the JSON is correct"
-                    ),
-                },
-            ],
-            max_tokens=1000,
-        )
-        return MultiUser.from_streaming_response(completion)
-
-    resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
-    assert len(resp) == 2
-    assert resp[0].name == "Jason"
-    assert resp[0].age == 20
-    assert resp[1].name == "Sarah"
-    assert resp[1].age == 30
+def test_multi_user():
+    def stream_extract(input: str, cls) -> Iterable[User]:
+        MultiUser = instructor.MultiTask(cls)
+        completion = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            stream=True,
+            functions=[MultiUser.openai_schema],
+            function_call={"name": MultiUser.openai_schema["name"]},
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a perfect entity extraction system",
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        f"Consider the data below:\n{input} "
+                        "Correctly segment it into entities. "
+                        "Make sure the JSON is correct."
+                    ),
+                },
+            ],
+            max_tokens=1000,
+        )
+        return MultiUser.from_streaming_response(completion)
+
+    resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
+    assert len(resp) == 2
+    assert resp[0].name == "Jason"
+    assert resp[0].age == 20
+    assert resp[1].name == "Sarah"
+    assert resp[1].age == 30
-def test_multi_user():
-    def stream_extract(input: str, cls) -> Iterable[User]:
-        MultiUser = instructor.MultiTask(cls)
-        completion = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            stream=True,
-            functions=[MultiUser.openai_schema],
-            function_call={"name": MultiUser.openai_schema["name"]},
-            messages=[
-                {
-                    "role": "system",
-                    "content": "You are a perfect entity extraction system",
-                },
-                {
-                    "role": "user",
-                    "content": (
-                        f"Consider the data below:\n{input}"
-                        "Correctly segment it into entitites"
-                        "Make sure the JSON is correct"
-                    ),
-                },
-            ],
-            max_tokens=1000,
-        )
-        return MultiUser.from_streaming_response(completion)
-
-    resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
-    assert len(resp) == 2
-    assert resp[0].name == "Jason"
-    assert resp[0].age == 20
-    assert resp[1].name == "Sarah"
-    assert resp[1].age == 30
+def test_multi_user():
+    def stream_extract(input: str, cls) -> Iterable[User]:
+        MultiUser = instructor.MultiTask(cls)
+        completion = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            stream=True,
+            functions=[MultiUser.openai_schema],
+            function_call={"name": MultiUser.openai_schema["name"]},
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a perfect entity extraction system",
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        f"Consider the data below:\n{input} "
+                        "Correctly segment it into entities. "
+                        "Make sure the JSON is correct."
+                    ),
+                },
+            ],
+            max_tokens=1000,
+        )
+        return MultiUser.from_streaming_response(completion)
+
+    resp = [user for user in stream_extract(input="Jason is 20, Sarah is 30", cls=User)]
+    assert len(resp) == 2
+    assert resp[0].name == "Jason"
+    assert resp[0].age == 20
+    assert resp[1].name == "Sarah"
+    assert resp[1].age == 30
diff --git a/tests/openai/test_patch.py b/tests/openai/test_patch.py
@@ -0,0 +1,112 @@
+import pytest
+import instructor
+
+from instructor import llm_validator
+from typing_extensions import Annotated
+from pydantic import field_validator, BaseModel, BeforeValidator, ValidationError
+from openai import OpenAI, AsyncOpenAI
+
+client = instructor.patch(OpenAI())
+aclient = instructor.patch(AsyncOpenAI())
+
+
+class UserExtract(BaseModel):
+    name: str
+    age: int
+
+    @field_validator("name")
+    @classmethod
+    def validate_name(cls, v):
+        if v.upper() != v:
+            raise ValueError("Name should be uppercase")
+        return v
+
+
+def test_runmodel_validator():
+    model = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        response_model=UserExtract,
+        max_retries=2,
+        messages=[
+            {"role": "user", "content": "Extract jason is 25 years old"},
+        ],
+    )
+    assert isinstance(model, UserExtract), "Should be instance of UserExtract"
+    assert model.name == "JASON"
+    assert hasattr(
+        model, "_raw_response"
+    ), "The raw response should be available from OpenAI"
+
+
+@pytest.mark.asyncio
+async def test_runmodel_async_validator():
+    model = await aclient.chat.completions.create(
+        model="gpt-3.5-turbo",
+        response_model=UserExtract,
+        max_retries=2,
+        messages=[
+            {"role": "user", "content": "Extract jason is 25 years old"},
+        ],
+    )
+    assert isinstance(model, UserExtract), "Should be instance of UserExtract"
+    assert model.name == "JASON"
+    assert hasattr(
+        model, "_raw_response"
+    ), "The raw response should be available from OpenAI"
+
+
+class UserExtractSimple(BaseModel):
+    name: str
+    age: int
+
+
+@pytest.mark.asyncio
+async def test_async_runmodel():
+    model = await aclient.chat.completions.create(
+        model="gpt-3.5-turbo",
+        response_model=UserExtractSimple,
+        messages=[
+            {"role": "user", "content": "Extract jason is 25 years old"},
+        ],
+    )
+    assert isinstance(
+        model, UserExtractSimple
+    ), "Should be instance of UserExtractSimple"
+    assert model.name.lower() == "jason"
+    assert hasattr(
+        model, "_raw_response"
+    ), "The raw response should be available from OpenAI"
+
+
+def test_runmodel():
+    model = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        response_model=UserExtractSimple,
+        messages=[
+            {"role": "user", "content": "Extract jason is 25 years old"},
+        ],
+    )
+    assert isinstance(
+        model, UserExtractSimple
+    ), "Should be instance of UserExtractSimple"
+    assert model.name.lower() == "jason"
+    assert hasattr(
+        model, "_raw_response"
+    ), "The raw response should be available from OpenAI"
+
+
+def test_runmodel_validator_error():
+    class QuestionAnswerNoEvil(BaseModel):
+        question: str
+        answer: Annotated[
+            str,
+            BeforeValidator(
+                llm_validator("don't say objectionable things", openai_client=client)
+            ),
+        ]
+
+    with pytest.raises(ValidationError):
+        QuestionAnswerNoEvil(
+            question="What is the meaning of life?",
+            answer="The meaning of life is to be evil and steal",
+        )