generated from UMassCDS/PythonProjectTemplate
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Example for working with Claude Sonnet vision
- Loading branch information
Showing
2 changed files
with
207 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Table Extraction with Anthropic's Claude Sonnet Model \n", | ||
"This notebook demonstrates how we might perform OCR table extraction with the Claude Sonnet model.\n", | ||
"\n", | ||
"Before starting, you should check that you have set your API key using an environmental variable `ANTHROPIC_API_KEY` or you will be prompted to set it. See [the Anthropic documentation](https://docs.anthropic.com/en/docs/quickstart) for details." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import all libraries used in this notebook\n", | ||
"import base64\n", | ||
"import getpass\n", | ||
"import io\n", | ||
"import os\n", | ||
"\n", | ||
"import anthropic\n", | ||
"from PIL import Image\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"API key was not set. Enter it in the prompt.\n", | ||
"API key should now be set.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"API_KEY_VAR = \"ANTHROPIC_API_KEY\"\n", | ||
"if not API_KEY_VAR in os.environ:\n", | ||
" print(\"API key was not set. Enter it in the prompt.\")\n", | ||
" os.environ[API_KEY_VAR] = getpass.getpass(\"Submit your Claude API key\")\n", | ||
" print(\"API key should now be set.\")\n", | ||
"else:\n", | ||
" print(\"API key was already set.\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Choose the image you will use for testing\n", | ||
"ORIGINAL_TEST_IMG_PATH = \"../data/MSF_data/Sheet 01 (1).jpg\"\n", | ||
"\n", | ||
"# A temporary file for the resized image that will be sent\n", | ||
"TEMP_IMG_PATH = \"tmp.jpg\"\n", | ||
"\n", | ||
"# Check that constant values for estimating cost are accurate\n", | ||
"# For estimating costs, the cost per million tokens in dollars\n", | ||
"ANTHROPIC_MODEL = \"claude-3-5-sonnet-20240620\"\n", | ||
"COST_PER_MILL_INPUT_TOKS = 3.0\n", | ||
"COST_PER_MILL_OUTPUT_TOKS = 15.0\n", | ||
"\n", | ||
"OUPUT_SIZE_LIMIT = 1024 # Max desired output tokens\n", | ||
"MAXIMUM_PIXEL_LENGTH = 1568 # Maximum pixel edge length allowed by Claude\n", | ||
"MAXIMUM_BYTES = 5242880 # Maximum image upload size allowed by Claude" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def estimate_img_tokens(pixel_width, pixel_height):\n", | ||
" return (pixel_height * pixel_width)/750\n", | ||
"\n", | ||
"def estimate_total_cost_by_tokens(estimated_num_tokens, cost_per_million_tokens):\n", | ||
" cost_per_token = cost_per_million_tokens * 1e-9\n", | ||
" return estimated_num_tokens * cost_per_token\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Image pixel size: (3000, 3869)\n", | ||
"Image size in bytes: 246725\n", | ||
"Estimated input cost for this image: 7.626752000000002e-06\n", | ||
"Estimated output cost for this image: 1.5360000000000002e-05\n", | ||
"Total estimated cost for 1000 images: 0.022986752000000003\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# This reads in the test image, resizes and encodes it appropriately for Claude.\n", | ||
"# It also prints out Anthropic cost estimates for working with similar images.\n", | ||
"with Image.open(ORIGINAL_TEST_IMG_PATH) as test_img:\n", | ||
" print(\"Image pixel size:\", test_img.size)\n", | ||
" img_to_send = test_img.copy()\n", | ||
" img_to_send.thumbnail((MAXIMUM_PIXEL_LENGTH, MAXIMUM_PIXEL_LENGTH))\n", | ||
" img_as_bytes = io.BytesIO()\n", | ||
" img_to_send.save(img_as_bytes, \"jpeg\")\n", | ||
" encoded_img = base64.b64encode(img_as_bytes.getvalue()).decode(\"utf-8\")\n", | ||
"\n", | ||
"byte_size = encoded_img.__sizeof__()\n", | ||
"\n", | ||
"print(\"Image size in bytes:\", byte_size)\n", | ||
"current_max_pixel_length = MAXIMUM_PIXEL_LENGTH\n", | ||
"while byte_size > MAXIMUM_BYTES:\n", | ||
" quality_scale = MAXIMUM_BYTES/byte_size\n", | ||
" print(\"Resizing image at\", quality_scale, \"percent\")\n", | ||
" current_max_pixel_length = int(quality_scale * current_max_pixel_length)\n", | ||
" img_to_send.thumbnail((current_max_pixel_length, current_max_pixel_length)) \n", | ||
" img_to_send.save(TEMP_IMG_PATH)\n", | ||
" img_as_bytes = io.BytesIO()\n", | ||
" img_to_send.save(img_as_bytes, \"jpeg\")\n", | ||
" encoded_img = base64.b64encode(img_as_bytes.getvalue()).decode(\"utf-8\")\n", | ||
" byte_size = encoded_img.__sizeof__() \n", | ||
" print(\"Resized Image pixel size:\", img_to_send.size)\n", | ||
" print(\"Resized Image size in bytes:\", byte_size)\n", | ||
" \n", | ||
"\n", | ||
"input_cost = estimate_total_cost_by_tokens(estimate_img_tokens(*img_to_send.size), COST_PER_MILL_INPUT_TOKS)\n", | ||
"print(\"Estimated input cost for this image:\", input_cost)\n", | ||
"output_cost = estimate_total_cost_by_tokens(OUPUT_SIZE_LIMIT, COST_PER_MILL_OUTPUT_TOKS)\n", | ||
"print(\"Estimated output cost for this image:\", output_cost)\n", | ||
"print(\"Total estimated cost for 1000 images:\", (input_cost + output_cost)*1000)\n", | ||
"\n", | ||
" " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"anthropic_client = anthropic.Anthropic()\n", | ||
"\n", | ||
"\n", | ||
"message = anthropic_client.messages.create(\n", | ||
" model=ANTHROPIC_MODEL, \n", | ||
" max_tokens=OUPUT_SIZE_LIMIT,\n", | ||
" messages = [\n", | ||
" {\n", | ||
" \"role\":\"user\", \n", | ||
" \"content\":[\n", | ||
" {\n", | ||
" \"type\":\"image\",\n", | ||
" \"source\": {\n", | ||
" \"type\": \"base64\", \n", | ||
" \"media_type\": \"image/jpeg\",\n", | ||
" \"data\": encoded_img,\n", | ||
" },\n", | ||
" },\n", | ||
" {\n", | ||
" \"type\":\"text\",\n", | ||
" \"text\":\"Identify the tables in the image and return them as a json object, where headers and row labels are keys in the json. Return only correctly formatted json. \"\n", | ||
" },\n", | ||
" ],\n", | ||
" }\n", | ||
" ]\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Message(id='msg_01EV5AiHwwZYx5qbDsngUdsY', content=[TextBlock(text='Here\\'s the JSON representation of the tables in the image:\\n\\n{\\n \"Total consultations\": {\\n \"Total outpatient SRH consultations\": \"682+1\"\\n },\\n \"ANC - diagnostics\": {\\n \"Pop1: Resident\": {\\n \"First ANC visit this pregnancy\": \"65\",\\n \"Follow-up ANC visit\": \"178\",\\n \"Syphilis number tested\": \"65\",\\n \"Syphilis number positive\": \"\",\\n \"HIV number tested (First test in ANC)\": \"\",\\n \"HIV number positive (First test in ANC)\": \"\",\\n \"HIV number tested (later visit)\": \"\",\\n \"HIV number positive (later visit)\": \"\",\\n \"Hep B number tested\": \"66\",\\n \"Hep B number positive\": \"\",\\n \"Haemoglobin number tested\": \"65\",\\n \"Haemoglobin number anaemia\": \"18\",\\n \"Haemoglobin number severe anaemia\": \"\"\\n },\\n \"Pop2: Displaced\": {\\n \"First ANC visit this pregnancy\": \"05\",\\n \"Follow-up ANC visit\": \"08\",\\n \"Syphilis number tested\": \"05\",\\n \"Syphilis number positive\": \"\",\\n \"HIV number tested (First test in ANC)\": \"\",\\n \"HIV number positive (First test in ANC)\": \"\",\\n \"HIV number tested (later visit)\": \"\",\\n \"HIV number positive (later visit)\": \"\",\\n \"Hep B number tested\": \"05\",\\n \"Hep B number positive\": \"\",\\n \"Haemoglobin number tested\": \"05\",\\n \"Haemoglobin number anaemia\": \"\",\\n \"Haemoglobin number severe anaemia\": \"\"\\n }\\n }\\n}', type='text')], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=1584, output_tokens=431))\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(message)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters