Skip to content

Commit

Permalink
Fixed ruff errors
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanleomk committed Oct 15, 2024
1 parent 210836b commit d4ad77c
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 33 deletions.
112 changes: 96 additions & 16 deletions docs/examples/extracting_receipts.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ This post demonstrates how to use Python's Pydantic library and OpenAI's GPT-4 m
First, we define two Pydantic models, `Item` and `Receipt`, to structure the extracted data. The `Item` class represents individual items on the receipt, with fields for name, price, and quantity. The `Receipt` class contains a list of `Item` objects and the total amount.

```python
from pydantic import BaseModel


class Item(BaseModel):
name: str
price: float
Expand All @@ -29,15 +32,15 @@ To ensure the accuracy of the extracted data, we use Pydantic's `model_validator

```python
@model_validator(mode="after")
def check_total(cls, values: "Receipt"):
items = values.items
total = values.total
def check_total(self):
items = self.items
total = self.total
calculated_total = sum(item.price * item.quantity for item in items)
if calculated_total != total:
raise ValueError(
f"Total {total} does not match the sum of item prices {calculated_total}"
)
return values
return self
```

## Extracting Receipt Data from Images
Expand All @@ -48,10 +51,35 @@ The `extract_receipt` function uses OpenAI's GPT-4 model to process an image URL
import instructor
from openai import OpenAI

client = instructor.from_openai(
client=OpenAI(),
mode=instructor.Mode.TOOLS,
)
# <%hide%>
from pydantic import BaseModel, model_validator


class Item(BaseModel):
name: str
price: float
quantity: int


class Receipt(BaseModel):
items: list[Item]
total: float

@model_validator(mode="after")
def check_total(cls, values: "Receipt"):
items = values.items
total = values.total
calculated_total = sum(item.price * item.quantity for item in items)
if calculated_total != total:
raise ValueError(
f"Total {total} does not match the sum of item prices {calculated_total}"
)
return values


# <%hide%>

client = instructor.from_openai(OpenAI())


def extract(url: str) -> Receipt:
Expand Down Expand Up @@ -82,14 +110,66 @@ def extract(url: str) -> Receipt:
In these examples, we apply the method to extract receipt data from two different images. The custom validation function ensures that the extracted total amount matches the sum of item prices.

```python
urls = [
"https://templates.mediamodifier.com/645124ff36ed2f5227cbf871/supermarket-receipt-template.jpg",
"https://ocr.space/Content/Images/receipt-ocr-original.jpg",
]

for url in urls:
receipt = extract(url)
print(receipt)
# <%hide%>
from pydantic import BaseModel, model_validator
import instructor
from openai import OpenAI


class Item(BaseModel):
name: str
price: float
quantity: int


class Receipt(BaseModel):
items: list[Item]
total: float

@model_validator(mode="after")
def check_total(cls, values: "Receipt"):
items = values.items
total = values.total
calculated_total = sum(item.price * item.quantity for item in items)
if calculated_total != total:
raise ValueError(
f"Total {total} does not match the sum of item prices {calculated_total}"
)
return values


client = instructor.from_openai(OpenAI())


def extract(url: str) -> Receipt:
return client.chat.completions.create(
model="gpt-4o",
max_tokens=4000,
response_model=Receipt,
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": url},
},
{
"type": "text",
"text": "Analyze the image and return the items in the receipt and the total amount.",
},
],
}
],
)


# <%hide%>
url = "https://templates.mediamodifier.com/645124ff36ed2f5227cbf871/supermarket-receipt-template.jpg"


receipt = extract(url)
print(receipt)
```

By combining the power of GPT-4 and Python's Pydantic library, we can accurately extract and validate receipt data from images, streamlining expense tracking and financial analysis tasks.
28 changes: 17 additions & 11 deletions docs/examples/local_classification.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,37 +39,40 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python

Here's an example of how to implement a system for handling confidential document queries using local models:

```python hl_lines="7-12 14-16 40-46"
from llama_cpp import Llama
```python hl_lines="7-12 14-16 43-52"
from llama_cpp import Llama # type: ignore
import instructor
from pydantic import BaseModel
from enum import Enum
from typing import Optional

llm = Llama.from_pretrained(
repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", # (1)!
llm = Llama.from_pretrained( # type: ignore
repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", # (1)!
filename="*Q4_K_M.gguf",
verbose=False, # (2)!
n_gpu_layers=-1, # (3)!
verbose=False, # (2)!
n_gpu_layers=-1, # (3)!
)

create = instructor.patch(
create=llm.create_chat_completion_openai_v1, #(4)!
create=llm.create_chat_completion_openai_v1, # type: ignore # (4)!
)


# Define query types for document-related inquiries
class QueryType(str, Enum):
DOCUMENT_CONTENT = "document_content"
LAST_MODIFIED = "last_modified"
ACCESS_PERMISSIONS = "access_permissions"
RELATED_DOCUMENTS = "related_documents"


# Define the structure for query responses
class QueryResponse(BaseModel):
query_type: QueryType
response: str
additional_info: Optional[str] = None


def process_confidential_query(query: str) -> QueryResponse:
prompt = f"""Analyze the following confidential document query and provide an appropriate response:
Query: {query}
Expand All @@ -80,9 +83,12 @@ def process_confidential_query(query: str) -> QueryResponse:
"""

return create(
response_model=QueryResponse, #(5)!
response_model=QueryResponse, # (5)!
messages=[
{"role": "system", "content": "You are a secure AI assistant trained to handle confidential document queries."},
{
"role": "system",
"content": "You are a secure AI assistant trained to handle confidential document queries.",
},
{"role": "user", "content": prompt},
],
)
Expand All @@ -98,8 +104,8 @@ confidential_queries = [
]

# Process each query and print the results
for i, query in enumerate(confidential_queries, 1):
response:QueryResponse = process_confidential_query(query)
for query in confidential_queries:
response: QueryResponse = process_confidential_query(query)
print(f"{query} : {response.query_type}")
"""
#> What are the key findings in the Q4 financial report? : document_content
Expand Down
5 changes: 3 additions & 2 deletions docs/examples/mistral.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ An example:
```python
import os
from pydantic import BaseModel
from mistralai.client import MistralClient
from mistralai import Mistral
from instructor import from_mistral, Mode


Expand All @@ -37,7 +37,7 @@ class UserDetails(BaseModel):


# enables `response_model` in chat call
client = MistralClient(api_key=os.environ.get("MISTRAL_API_KEY"))
client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"))

instructor_client = from_mistral(
client=client,
Expand All @@ -53,6 +53,7 @@ resp = instructor_client.messages.create(
)

print(resp)
#> name='Jason' age=10

# output: UserDetails(name='Jason', age=10)
```
8 changes: 5 additions & 3 deletions docs/examples/sqlmodel.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class Hero(SQLModel, instructor.OpenAISchema, table=True):
The `create_hero` function will query `OpenAI` for a `Hero` record

```python
import instructor

client = instructor.from_openai(OpenAI())


Expand All @@ -56,9 +58,9 @@ SQLModel.metadata.create_all(engine)

hero = create_hero()
print(hero.model_dump())
"""
{'name': 'SuperNova', 'secret_name': 'Mia Thompson', 'age': 28, 'id': None}
"""
"""
{'name': 'SuperNova', 'secret_name': 'Mia Thompson', 'age': 28, 'id': None}
"""

with Session(engine) as session:
session.add(hero)
Expand Down
2 changes: 1 addition & 1 deletion docs/hub/knowledge_graph.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ client = instructor.from_openai(OpenAI())
def generate_graph(input_text: str) -> KnowledgeGraph:
"""Generates a knowledge graph from the input text."""
return client.chat.completions.create(
model="gpt-3.5-turbo",
model="gpt-4o-mini",
messages=[
{
"role": "user",
Expand Down

0 comments on commit d4ad77c

Please sign in to comment.