Skip to content

Commit

Permalink
Example for working with Claude Sonnet vision
Browse files Browse the repository at this point in the history
  • Loading branch information
ginic committed Jul 8, 2024
1 parent 565a7bd commit b6b2ba6
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 1 deletion.
204 changes: 204 additions & 0 deletions notebooks/claude_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Table Extraction with Anthropic's Claude Sonnet Model \n",
"This notebook demonstrates how we might perform OCR table extraction with the Claude Sonnet model.\n",
"\n",
"Before starting, you should check that you have set your API key using an environmental variable `ANTHROPIC_API_KEY` or you will be prompted to set it. See [the Anthropic documentation](https://docs.anthropic.com/en/docs/quickstart) for details."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Import all libraries used in this notebook\n",
"import base64\n",
"import getpass\n",
"import io\n",
"import os\n",
"\n",
"import anthropic\n",
"from PIL import Image\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key was not set. Enter it in the prompt.\n",
"API key should now be set.\n"
]
}
],
"source": [
"API_KEY_VAR = \"ANTHROPIC_API_KEY\"\n",
"if not API_KEY_VAR in os.environ:\n",
" print(\"API key was not set. Enter it in the prompt.\")\n",
" os.environ[API_KEY_VAR] = getpass.getpass(\"Submit your Claude API key\")\n",
" print(\"API key should now be set.\")\n",
"else:\n",
" print(\"API key was already set.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Choose the image you will use for testing\n",
"ORIGINAL_TEST_IMG_PATH = \"../data/MSF_data/Sheet 01 (1).jpg\"\n",
"\n",
"# A temporary file for the resized image that will be sent\n",
"TEMP_IMG_PATH = \"tmp.jpg\"\n",
"\n",
"# Check that constant values for estimating cost are accurate\n",
"# For estimating costs, the cost per million tokens in dollars\n",
"ANTHROPIC_MODEL = \"claude-3-5-sonnet-20240620\"\n",
"COST_PER_MILL_INPUT_TOKS = 3.0\n",
"COST_PER_MILL_OUTPUT_TOKS = 15.0\n",
"\n",
"OUPUT_SIZE_LIMIT = 1024 # Max desired output tokens\n",
"MAXIMUM_PIXEL_LENGTH = 1568 # Maximum pixel edge length allowed by Claude\n",
"MAXIMUM_BYTES = 5242880 # Maximum image upload size allowed by Claude"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def estimate_img_tokens(pixel_width, pixel_height):\n",
" return (pixel_height * pixel_width)/750\n",
"\n",
"def estimate_total_cost_by_tokens(estimated_num_tokens, cost_per_million_tokens):\n",
" cost_per_token = cost_per_million_tokens * 1e-9\n",
" return estimated_num_tokens * cost_per_token\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Image pixel size: (3000, 3869)\n",
"Image size in bytes: 246725\n",
"Estimated input cost for this image: 7.626752000000002e-06\n",
"Estimated output cost for this image: 1.5360000000000002e-05\n",
"Total estimated cost for 1000 images: 0.022986752000000003\n"
]
}
],
"source": [
"# This reads in the test image, resizes and encodes it appropriately for Claude.\n",
"# It also prints out Anthropic cost estimates for working with similar images.\n",
"with Image.open(ORIGINAL_TEST_IMG_PATH) as test_img:\n",
" print(\"Image pixel size:\", test_img.size)\n",
" img_to_send = test_img.copy()\n",
" img_to_send.thumbnail((MAXIMUM_PIXEL_LENGTH, MAXIMUM_PIXEL_LENGTH))\n",
" img_as_bytes = io.BytesIO()\n",
" img_to_send.save(img_as_bytes, \"jpeg\")\n",
" encoded_img = base64.b64encode(img_as_bytes.getvalue()).decode(\"utf-8\")\n",
"\n",
"byte_size = encoded_img.__sizeof__()\n",
"\n",
"print(\"Image size in bytes:\", byte_size)\n",
"current_max_pixel_length = MAXIMUM_PIXEL_LENGTH\n",
"while byte_size > MAXIMUM_BYTES:\n",
" quality_scale = MAXIMUM_BYTES/byte_size\n",
" print(\"Resizing image at\", quality_scale, \"percent\")\n",
" current_max_pixel_length = int(quality_scale * current_max_pixel_length)\n",
" img_to_send.thumbnail((current_max_pixel_length, current_max_pixel_length)) \n",
" img_to_send.save(TEMP_IMG_PATH)\n",
" img_as_bytes = io.BytesIO()\n",
" img_to_send.save(img_as_bytes, \"jpeg\")\n",
" encoded_img = base64.b64encode(img_as_bytes.getvalue()).decode(\"utf-8\")\n",
" byte_size = encoded_img.__sizeof__() \n",
" print(\"Resized Image pixel size:\", img_to_send.size)\n",
" print(\"Resized Image size in bytes:\", byte_size)\n",
" \n",
"\n",
"input_cost = estimate_total_cost_by_tokens(estimate_img_tokens(*img_to_send.size), COST_PER_MILL_INPUT_TOKS)\n",
"print(\"Estimated input cost for this image:\", input_cost)\n",
"output_cost = estimate_total_cost_by_tokens(OUPUT_SIZE_LIMIT, COST_PER_MILL_OUTPUT_TOKS)\n",
"print(\"Estimated output cost for this image:\", output_cost)\n",
"print(\"Total estimated cost for 1000 images:\", (input_cost + output_cost)*1000)\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"anthropic_client = anthropic.Anthropic()\n",
"\n",
"\n",
"message = anthropic_client.messages.create(\n",
" model=ANTHROPIC_MODEL, \n",
" max_tokens=OUPUT_SIZE_LIMIT,\n",
" messages = [\n",
" {\n",
" \"role\":\"user\", \n",
" \"content\":[\n",
" {\n",
" \"type\":\"image\",\n",
" \"source\": {\n",
" \"type\": \"base64\", \n",
" \"media_type\": \"image/jpeg\",\n",
" \"data\": encoded_img,\n",
" },\n",
" },\n",
" {\n",
" \"type\":\"text\",\n",
" \"text\":\"Identify the tables in the image and return them as a json object, where headers and row labels are keys in the json. Return only correctly formatted json. \"\n",
" },\n",
" ],\n",
" }\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Message(id='msg_01EV5AiHwwZYx5qbDsngUdsY', content=[TextBlock(text='Here\\'s the JSON representation of the tables in the image:\\n\\n{\\n \"Total consultations\": {\\n \"Total outpatient SRH consultations\": \"682+1\"\\n },\\n \"ANC - diagnostics\": {\\n \"Pop1: Resident\": {\\n \"First ANC visit this pregnancy\": \"65\",\\n \"Follow-up ANC visit\": \"178\",\\n \"Syphilis number tested\": \"65\",\\n \"Syphilis number positive\": \"\",\\n \"HIV number tested (First test in ANC)\": \"\",\\n \"HIV number positive (First test in ANC)\": \"\",\\n \"HIV number tested (later visit)\": \"\",\\n \"HIV number positive (later visit)\": \"\",\\n \"Hep B number tested\": \"66\",\\n \"Hep B number positive\": \"\",\\n \"Haemoglobin number tested\": \"65\",\\n \"Haemoglobin number anaemia\": \"18\",\\n \"Haemoglobin number severe anaemia\": \"\"\\n },\\n \"Pop2: Displaced\": {\\n \"First ANC visit this pregnancy\": \"05\",\\n \"Follow-up ANC visit\": \"08\",\\n \"Syphilis number tested\": \"05\",\\n \"Syphilis number positive\": \"\",\\n \"HIV number tested (First test in ANC)\": \"\",\\n \"HIV number positive (First test in ANC)\": \"\",\\n \"HIV number tested (later visit)\": \"\",\\n \"HIV number positive (later visit)\": \"\",\\n \"Hep B number tested\": \"05\",\\n \"Hep B number positive\": \"\",\\n \"Haemoglobin number tested\": \"05\",\\n \"Haemoglobin number anaemia\": \"\",\\n \"Haemoglobin number severe anaemia\": \"\"\\n }\\n }\\n}', type='text')], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=1584, output_tokens=431))\n"
]
}
],
"source": [
"print(message)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ dev = [
"jupyter",
"matplotlib",
"seaborn",
"sphinx",
"anthropic",
"Pillow",
"dotenv"
]

# If your project contains scripts you'd like to be available command line, you can define them here.
Expand Down

0 comments on commit b6b2ba6

Please sign in to comment.