From ca7b11500d4847c493522989d89b895304842d07 Mon Sep 17 00:00:00 2001 From: isaacbmiller Date: Mon, 3 Feb 2025 13:36:24 +0100 Subject: [PATCH 1/5] feat: start working on r1 impl --- docs/docs/tutorials/math-r1/index.ipynb | 2911 +++++++++++++++++++++++ dspy/adapters/chat_adapter.py | 3 + dspy/adapters/json_adapter.py | 3 + output.png | Bin 0 -> 16570 bytes 4 files changed, 2917 insertions(+) create mode 100644 docs/docs/tutorials/math-r1/index.ipynb create mode 100644 output.png diff --git a/docs/docs/tutorials/math-r1/index.ipynb b/docs/docs/tutorials/math-r1/index.ipynb new file mode 100644 index 000000000..9847e1ae4 --- /dev/null +++ b/docs/docs/tutorials/math-r1/index.ipynb @@ -0,0 +1,2911 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tutorial: Math Reasoning\n", + "\n", + "Let's walk through a quick example of setting up a `dspy.ChainOfThought` module and optimizing it for answering algebra questions.\n", + "\n", + "Install the latest DSPy via `pip install -U dspy` and follow along." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's tell DSPy that we will use OpenAI's `gpt-4o-mini` in our modules. To authenticate, DSPy will look into your `OPENAI_API_KEY`. You can easily swap this out for [other providers or local models](https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/pydantic/_internal/_config.py:345: UserWarning: Valid config keys have changed in V2:\n", + "* 'fields' has been removed\n", + " warnings.warn(message, UserWarning)\n", + "/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import dspy\n", + "\n", + "\n", + "# dspy.configure(lm=gpt4o_mini) # we'll use gpt-4o-mini as the default LM, unless otherwise specified" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, let's load some data examples from the [MATH](https://arxiv.org/abs/2103.03874) benchmark. We'll use a training split for optimization and evaluate it on a held-out dev set.\n", + "\n", + "Please note that the following step will require:\n", + "```bash\n", + "%pip install git+https://github.com/hendrycks/math.git\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dotenv\n", + "dotenv.load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "350 350\n" + ] + } + ], + "source": [ + "from dspy.datasets import MATH\n", + "\n", + "dataset = MATH(subset='algebra')\n", + "print(len(dataset.train), len(dataset.dev))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's inspect one example from the training set." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: The doctor has told Cal O'Ree that during his ten weeks of working out at the gym, he can expect each week's weight loss to be $1\\%$ of his weight at the end of the previous week. His weight at the beginning of the workouts is $244$ pounds. How many pounds does he expect to weigh at the end of the ten weeks? Express your answer to the nearest whole number.\n", + "Answer: 221\n" + ] + } + ], + "source": [ + "example = dataset.train[0]\n", + "print(\"Question:\", example.question)\n", + "print(\"Answer:\", example.answer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's define our module. It's extremely simple: just a chain-of-thought step that takes a `question` and produces an `answer`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# with dspy.settings.configure(lm=gpt4o_mini):\n", + "module = dspy.ChainOfThought(\"question -> answer\")\n", + "# module(question=example.question)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, let's set up an evaluator for the zero-shot module above, before prompt optimization." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'module' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[10], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m r1 \u001b[38;5;241m=\u001b[39m dspy\u001b[38;5;241m.\u001b[39mLM(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfireworks_ai/accounts/fireworks/models/deepseek-r1\u001b[39m\u001b[38;5;124m'\u001b[39m, max_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4000\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m dspy\u001b[38;5;241m.\u001b[39msettings\u001b[38;5;241m.\u001b[39mcontext(lm\u001b[38;5;241m=\u001b[39mgpt4o):\n\u001b[0;32m---> 10\u001b[0m gpt4o_uncompiled \u001b[38;5;241m=\u001b[39m evaluate(\u001b[43mmodule\u001b[49m)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m dspy\u001b[38;5;241m.\u001b[39msettings\u001b[38;5;241m.\u001b[39mcontext(lm\u001b[38;5;241m=\u001b[39mr1):\n\u001b[1;32m 13\u001b[0m r1_uncompiled \u001b[38;5;241m=\u001b[39m evaluate(module)\n", + "\u001b[0;31mNameError\u001b[0m: name 'module' is not defined" + ] + } + ], + "source": [ + "THREADS = 35\n", + "kwargs = dict(num_threads=THREADS, display_progress=True)\n", + "evaluate = dspy.Evaluate(devset=dataset.dev, metric=dataset.metric, **kwargs)\n", + "\n", + "gpt4o_mini = dspy.LM('openai/gpt-4o-mini', max_tokens=2000)\n", + "gpt4o = dspy.LM('openai/gpt-4o', max_tokens=2000)\n", + "r1 = dspy.LM('fireworks_ai/accounts/fireworks/models/deepseek-r1', max_tokens=4000)\n", + "\n", + "with dspy.settings.context(lm=gpt4o):\n", + " gpt4o_uncompiled = evaluate(module)\n", + "\n", + "with dspy.settings.context(lm=r1):\n", + " r1_uncompiled = evaluate(module)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'prompt': None,\n", + " 'messages': [{'role': 'system',\n", + " 'content': 'Your input fields are:\\n1. `question` (str)\\n\\nYour output fields are:\\n1. `reasoning` (str)\\n2. `answer` (str)\\n\\nAll interactions will be structured in the following way, with the appropriate values filled in.\\n\\nInputs will have the following structure:\\n\\n[[ ## question ## ]]\\n{question}\\n\\nOutputs will be a JSON object with the following fields.\\n\\n{\\n \"reasoning\": \"{reasoning}\",\\n \"answer\": \"{answer}\"\\n}\\n\\nIn adhering to this structure, your objective is: \\n Given the fields `question`, produce the fields `answer`.'},\n", + " {'role': 'user',\n", + " 'content': '[[ ## question ## ]]\\nTwo numbers are independently selected from the set of positive integers less than or equal to 5. What is the probability that the sum of the two numbers is greater than their product? Express your answer as a common fraction.\\n\\nRespond with a JSON object in the following order of fields: `reasoning`, then `answer`.'}],\n", + " 'kwargs': {'temperature': 0.0,\n", + " 'max_tokens': 1000,\n", + " 'response_format': dspy.adapters.json_adapter.DSPyProgramOutputs},\n", + " 'response': ModelResponse(id='bdd65b30-8b6e-4201-a0d6-567207a90a39', choices=[Choices(finish_reason='length', index=0, message=Message(content=\"\\nOkay, let's see. The problem is asking for the probability that the sum of two numbers selected independently from the set of positive integers up to 5 is greater than their product. Hmm. So first, I need to figure out all possible pairs of numbers, then count how many of those pairs satisfy the condition sum > product. Then divide that by the total number of pairs to get the probability.\\n\\nWait, the numbers are selected independently, so does that mean that they can be the same? Like, is (2,2) a valid pair? Since it's independent selection, I think yes. So the total number of possible pairs would be 5 options for the first number and 5 for the second, so 5*5=25 total possible ordered pairs. Right?\\n\\nNow, let's list all possible pairs and check the condition sum > product. Maybe that's the easiest way, even though it's a bit tedious. Let's start with the first number being 1.\\n\\nIf the first number is 1, then the second number can be 1,2,3,4,5.\\n\\nFor (1,1): sum is 2, product is 1. 2>1, so that's a success.\\n\\n(1,2): sum 3, product 2. 3>2, success.\\n\\n(1,3): sum 4, product 3. Success.\\n\\n(1,4): sum 5, product 4. Success.\\n\\n(1,5): sum 6, product 5. Success. So all pairs where the first number is 1 will satisfy the condition. That's 5 cases.\\n\\nNow, moving to the first number being 2.\\n\\nSecond number can be 1,2,3,4,5.\\n\\n(2,1): sum 3, product 2. 3>2, success.\\n\\n(2,2): sum 4, product 4. 4 is not greater than 4. So no.\\n\\n(2,3): sum 5, product 6. 5<6, no.\\n\\n(2,4): sum 6, product 8. No.\\n\\n(2,5): sum 7, product 10. No. So for first number 2, only (2,1) works. That's 1 case.\\n\\nNext, first number 3.\\n\\nSecond number possibilities:\\n\\n(3,1): sum 4, product 3. 4>3, success.\\n\\n(3,2): sum 5, product 6. 5<6, no.\\n\\n(3,3): sum 6, product 9. No.\\n\\n(3,4): sum 7, product 12. No.\\n\\n(3,5): sum 8, product 15. No. So only (3,1) works. 1 case.\\n\\nFirst number 4.\\n\\nSecond numbers:\\n\\n(4,1): sum 5, product 4. 5>4, success.\\n\\n(4,2): sum 6, product 8. No.\\n\\n(4,3): sum 7, product 12. No.\\n\\n(4,4): sum 8, product 16. No.\\n\\n(4,5): sum 9, product 20. No. So only (4,1) works. 1 case.\\n\\nFirst number 5.\\n\\nSecond numbers:\\n\\n(5,1): sum 6, product 5. 6>5, success.\\n\\n(5,2): sum 7, product 10. No.\\n\\n(5,3): sum 8, product 15. No.\\n\\n(5,4): sum 9, product 20. No.\\n\\n(5,5): sum 10, product 25. No. So only (5,1) works. 1 case.\\n\\nNow, let's tally up all the successful cases. From first number 1: 5. Then first numbers 2,3,4,5 each contribute 1. So total successful cases: 5 +1+1+1+1=9.\\n\\nWait, wait. Wait, but when the first number is 2, the pair (2,1) is a success. Similarly, when the first number is 3, (3,1) is a success. But when the first number is 1, the second number can be 2,3,4,5, which are all successes. So total successful pairs:\\n\\nFrom first number 1: (1,1), (1,2), (1,3), (1,4), (1,5) → 5.\\n\\nFrom first number 2: (2,1) → 1.\\n\\nFrom first number 3: (3,1) →1.\\n\\nFrom first number 4: (4\", role='assistant', tool_calls=None, function_call=None))], created=1738448394, model='fireworks_ai/accounts/fireworks/models/deepseek-r1', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=1000, prompt_tokens=197, total_tokens=1197, completion_tokens_details=None, prompt_tokens_details=None), service_tier=None),\n", + " 'outputs': [\"\\nOkay, let's see. The problem is asking for the probability that the sum of two numbers selected independently from the set of positive integers up to 5 is greater than their product. Hmm. So first, I need to figure out all possible pairs of numbers, then count how many of those pairs satisfy the condition sum > product. Then divide that by the total number of pairs to get the probability.\\n\\nWait, the numbers are selected independently, so does that mean that they can be the same? Like, is (2,2) a valid pair? Since it's independent selection, I think yes. So the total number of possible pairs would be 5 options for the first number and 5 for the second, so 5*5=25 total possible ordered pairs. Right?\\n\\nNow, let's list all possible pairs and check the condition sum > product. Maybe that's the easiest way, even though it's a bit tedious. Let's start with the first number being 1.\\n\\nIf the first number is 1, then the second number can be 1,2,3,4,5.\\n\\nFor (1,1): sum is 2, product is 1. 2>1, so that's a success.\\n\\n(1,2): sum 3, product 2. 3>2, success.\\n\\n(1,3): sum 4, product 3. Success.\\n\\n(1,4): sum 5, product 4. Success.\\n\\n(1,5): sum 6, product 5. Success. So all pairs where the first number is 1 will satisfy the condition. That's 5 cases.\\n\\nNow, moving to the first number being 2.\\n\\nSecond number can be 1,2,3,4,5.\\n\\n(2,1): sum 3, product 2. 3>2, success.\\n\\n(2,2): sum 4, product 4. 4 is not greater than 4. So no.\\n\\n(2,3): sum 5, product 6. 5<6, no.\\n\\n(2,4): sum 6, product 8. No.\\n\\n(2,5): sum 7, product 10. No. So for first number 2, only (2,1) works. That's 1 case.\\n\\nNext, first number 3.\\n\\nSecond number possibilities:\\n\\n(3,1): sum 4, product 3. 4>3, success.\\n\\n(3,2): sum 5, product 6. 5<6, no.\\n\\n(3,3): sum 6, product 9. No.\\n\\n(3,4): sum 7, product 12. No.\\n\\n(3,5): sum 8, product 15. No. So only (3,1) works. 1 case.\\n\\nFirst number 4.\\n\\nSecond numbers:\\n\\n(4,1): sum 5, product 4. 5>4, success.\\n\\n(4,2): sum 6, product 8. No.\\n\\n(4,3): sum 7, product 12. No.\\n\\n(4,4): sum 8, product 16. No.\\n\\n(4,5): sum 9, product 20. No. So only (4,1) works. 1 case.\\n\\nFirst number 5.\\n\\nSecond numbers:\\n\\n(5,1): sum 6, product 5. 6>5, success.\\n\\n(5,2): sum 7, product 10. No.\\n\\n(5,3): sum 8, product 15. No.\\n\\n(5,4): sum 9, product 20. No.\\n\\n(5,5): sum 10, product 25. No. So only (5,1) works. 1 case.\\n\\nNow, let's tally up all the successful cases. From first number 1: 5. Then first numbers 2,3,4,5 each contribute 1. So total successful cases: 5 +1+1+1+1=9.\\n\\nWait, wait. Wait, but when the first number is 2, the pair (2,1) is a success. Similarly, when the first number is 3, (3,1) is a success. But when the first number is 1, the second number can be 2,3,4,5, which are all successes. So total successful pairs:\\n\\nFrom first number 1: (1,1), (1,2), (1,3), (1,4), (1,5) → 5.\\n\\nFrom first number 2: (2,1) → 1.\\n\\nFrom first number 3: (3,1) →1.\\n\\nFrom first number 4: (4\"],\n", + " 'usage': {'completion_tokens': 1000,\n", + " 'prompt_tokens': 197,\n", + " 'total_tokens': 1197,\n", + " 'completion_tokens_details': None,\n", + " 'prompt_tokens_details': None},\n", + " 'cost': 0.0,\n", + " 'timestamp': '2025-02-01T23:20:22.135236',\n", + " 'uuid': 'cbd815b9-f576-42c1-a948-c4428d02477f',\n", + " 'model': 'fireworks_ai/accounts/fireworks/models/deepseek-r1',\n", + " 'response_model': 'fireworks_ai/accounts/fireworks/models/deepseek-r1',\n", + " 'model_type': 'chat'}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r1.history[-1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And lastly let's optimize our module. Since we want strong reasoning, we'll use the large GPT-4o as the teacher model (used to bootstrap reasoning for the small LM at optimization time) but not as the prompt model (used to craft instructions) or the task model (trained).\n", + "\n", + "GPT-4o will be invoked only a small number of times. The model involved directly in optimization and in the resulting (optimized) program will be GPT-4o-mini.\n", + "\n", + "We will also specify `max_bootstrapped_demos=4` which means we want at most four bootstrapped examples in the prompt and `max_labeled_demos=4` which means that, in total between bootstrapped and pre-labeled examples, we want at most four." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:\n", + "num_trials: 25\n", + "minibatch: True\n", + "num_candidates: 19\n", + "valset size: 280\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=19 sets of demonstrations...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapping set 1/19\n", + "Bootstrapping set 2/19\n", + "Bootstrapping set 3/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [00:00<00:00, 723.03it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 4/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [00:00<00:00, 731.26it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 5/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [00:00<00:00, 934.09it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 6/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [00:00<00:00, 974.68it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 7/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 7%|▋ | 5/70 [00:00<00:00, 1005.39it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.\n", + "Bootstrapping set 8/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:00<00:00, 819.68it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 9/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 3%|▎ | 2/70 [00:00<00:00, 1050.55it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n", + "Bootstrapping set 10/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [00:00<00:00, 1108.36it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 11/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:00<00:00, 946.15it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 12/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 3%|▎ | 2/70 [00:00<00:00, 1037.42it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n", + "Bootstrapping set 13/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:00<00:00, 190.59it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 14/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 7%|▋ | 5/70 [00:00<00:00, 500.25it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.\n", + "Bootstrapping set 15/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:00<00:00, 497.49it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 16/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 3%|▎ | 2/70 [00:00<00:00, 990.51it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n", + "Bootstrapping set 17/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:00<00:00, 874.00it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 18/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:00<00:00, 447.87it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 19/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 3%|▎ | 2/70 [00:00<00:00, 870.28it/s]\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "Proposing instructions...\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given the fields `question`, produce the fields `answer`.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a skilled mathematician tasked with solving complex mathematical problems. For each given \"question\", produce a detailed \"reasoning\" that logically breaks down the problem step by step, and then provide the final \"answer\". Ensure that your reasoning is clear, structured, and demonstrates a thorough understanding of mathematical principles. Use the structured format where the question is prefixed with \"Question:\", the reasoning with \"Reasoning: Let's think step by step in order to\", and the answer with \"Answer:\" to maintain coherence and clarity in your response.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 2: For each `question`, provide a comprehensive `reasoning` that breaks down the problem into clear, logical steps and applies appropriate mathematical concepts to reach a solution. Then, calculate and present the final `answer`, ensuring accuracy and clarity. Use structured reasoning to bridge the gap between theoretical understanding and practical application, fostering a deeper comprehension of the problem-solving process.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 3: You are a mathematical problem solver. Given a `question` field that describes a mathematical word problem, your task is to generate a logical and detailed step-by-step reasoning process that leads to the solution, and then provide the final `answer`. Ensure that the reasoning is coherent, methodical, and clearly guides through each step of the problem-solving process to arrive at an accurate answer.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 4: For each provided `question`, carefully analyze and apply mathematical principles to generate a detailed `reasoning` process that logically leads to the correct `answer`. Ensure the reasoning is clear, step-by-step, and demonstrates a thorough understanding of the problem-solving method. Finally, present the `answer` succinctly, reflecting the conclusion of the reasoning process.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 5: Given a mathematical problem as a \"question\", generate the \"reasoning\" that logically explains the step-by-step process to solve the problem, and then provide the final \"answer\".\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 6: Imagine you are a contestant in a high-stakes international mathematics competition where each problem could determine your final standing. Your task is to take a given mathematical question, meticulously think through each step in the reasoning process, and derive the correct answer. Ensure your reasoning is clear and thorough, as you will need to justify your solution to a panel of expert judges. Produce a detailed explanation and the final answer for the question provided.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 7: Given a mathematical `question`, generate a detailed step-by-step `reasoning` that logically leads to the `answer`. The reasoning should include the application of relevant mathematical concepts, formulas, or theorems, and clearly explain each step taken to solve the problem. Finally, provide the `answer` as a concise result of the computation. Ensure that the reasoning is comprehensive and easy to follow, supporting the learner's understanding of the problem-solving process.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 8: For each mathematical question provided, generate a detailed step-by-step reasoning that leads to the final answer. Ensure that the reasoning is logical and clear, demonstrating the process of solving the problem methodically. Finally, present the solution as the answer in a concise format.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 9: For each mathematical question provided, generate a detailed step-by-step reasoning that logically explains the solution process, and then provide the final answer. Ensure that the reasoning is clear, comprehensive, and educational, guiding the user through each calculation and logical deduction to arrive at the correct solution. This approach should not only provide the correct answer but also enhance understanding of the underlying mathematical concepts and problem-solving strategies.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 10: Imagine you are participating in a prestigious mathematics competition where you must solve complex problems with precision and clarity. Given a mathematical `question`, generate a detailed `reasoning` process that outlines each step of your solution, and provide the final `answer`. Your goal is to demonstrate exceptional problem-solving skills and articulate your methodology clearly, as if explaining to a panel of judges. Ensure that your reasoning is thorough and your final answer is accurate to showcase your mathematical proficiency.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 11: Imagine you are participating in a high-stakes mathematics competition where every second counts. You are presented with a challenging mathematical problem in the `question` field. Your task is to swiftly and accurately break down the problem using a step-by-step reasoning process, ensuring clarity and precision at every step. After thoroughly analyzing the problem, provide the final solution in the `answer` field. Your ability to logically and methodically derive the correct answer will determine your success in this intense competitive environment.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 12: Given a mathematical `question`, generate a detailed `reasoning` that logically explains the steps to solve the problem, followed by the `answer`. Ensure the reasoning is clear and step-by-step to facilitate understanding of the problem-solving process.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 13: Imagine you are participating in a high-stakes mathematics competition where every second counts. You are presented with a complex mathematical problem as the \"question\". Your task is to swiftly and accurately generate both the \"reasoning\" and the \"answer\". The reasoning should be a detailed, step-by-step guide that logically leads to the solution, ensuring clarity and precision. Once the reasoning is complete, provide the final \"answer\" to the problem. Remember, the quality of your reasoning can make the difference between winning and losing, so ensure each step is meticulously detailed and correct.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 14: You are a mathematical problem-solving expert tasked with analyzing and solving complex mathematical questions. For each given `question`, generate a detailed step-by-step `reasoning` that explains the logical and mathematical processes used to arrive at the `answer`. Ensure that the explanation is clear, structured, and comprehensible, showcasing your expertise in applying mathematical principles and problem-solving skills. Provide the final `answer` as the last step in your response.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 15: Given a mathematical word problem as `question`, generate a detailed step-by-step `reasoning` that leads to the solution, and then provide the `answer`.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 16: Given a mathematical question, generate a detailed step-by-step reasoning process that leads to the solution, and provide the final answer.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 17: You are a mathematical reasoning expert tasked with solving complex problems. For each given `question`, generate a detailed step-by-step `reasoning` that leads to the `answer`. Your explanation should be clear and methodical, demonstrating how you apply mathematical principles and logical deductions to arrive at the correct solution. Provide both the `reasoning` and the `answer` as outputs.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: 18: Given a mathematical word problem as the `question`, generate a detailed `reasoning` to solve the problem step-by-step, ensuring logical clarity and structured presentation. Then, compute and provide the final `answer` based on the derived reasoning. Focus on clear communication of mathematical concepts and calculations, demonstrating thorough understanding and accurate application of relevant principles.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 1 / 28 - Full Evaluation of Default Program ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n", + "Average Metric: 209.00 / 280 (74.6%): 100%|██████████| 280/280 [00:00<00:00, 4967.79it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:56 INFO dspy.evaluate.evaluate: Average Metric: 209 / 280 (74.6%)\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 74.64\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 20.00 / 25 (80.0%): 100%|██████████| 25/25 [00:00<00:00, 4411.34it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:56 INFO dspy.evaluate.evaluate: Average Metric: 20 / 25 (80.0%)\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0]\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:00<00:00, 4855.19it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:56 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 10', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0]\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 17.00 / 25 (68.0%): 100%|██████████| 25/25 [00:00<00:00, 4352.38it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:56 INFO dspy.evaluate.evaluate: Average Metric: 17 / 25 (68.0%)\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 18'].\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0]\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:56 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 17.00 / 25 (68.0%): 100%|██████████| 25/25 [00:00<00:00, 4755.66it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 17 / 25 (68.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 15', 'Predictor 0: Few-Shot Set 2'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:00<00:00, 4850.48it/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 18'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 7 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 20.00 / 25 (80.0%): 100%|██████████| 25/25 [00:00<00:00, 4843.98it/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 20 / 25 (80.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 1'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 19.00 / 25 (76.0%): 100%|██████████| 25/25 [00:00<00:00, 644.30it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 19 / 25 (76.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 76.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 12'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 17.00 / 25 (68.0%): 100%|██████████| 25/25 [00:00<00:00, 4200.18it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 17 / 25 (68.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 11', 'Predictor 0: Few-Shot Set 13'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:00<00:00, 4271.36it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 4'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 /28 - Full Evaluation =====\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 92.0) from minibatch trials...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 251.00 / 280 (89.6%): 100%|██████████| 280/280 [00:00<00:00, 3028.83it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 251 / 280 (89.6%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: \u001b[92mNew best full eval score!\u001b[0m Score: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 20.00 / 25 (80.0%): 100%|██████████| 25/25 [00:00<00:00, 3730.26it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 20 / 25 (80.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 10', 'Predictor 0: Few-Shot Set 8'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 13 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:00<00:00, 3726.42it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17', 'Predictor 0: Few-Shot Set 10'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 14 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 21.00 / 25 (84.0%): 100%|██████████| 25/25 [00:00<00:00, 3479.83it/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 21 / 25 (84.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 84.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17', 'Predictor 0: Few-Shot Set 2'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 15 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:00<00:00, 3444.39it/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 16 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:00<00:00, 3069.78it/s] \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 17 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:00<00:00, 2977.64it/s] \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 10', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 18 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:00<00:00, 2736.51it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 9', 'Predictor 0: Few-Shot Set 16'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 19 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 19.00 / 25 (76.0%): 100%|██████████| 25/25 [00:00<00:00, 3829.58it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 19 / 25 (76.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 76.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 6'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 20 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:00<00:00, 3877.15it/s] \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:57 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0, 96.0]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64]\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 21 /28 - Full Evaluation =====\n", + "2025/02/02 01:23:57 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 96.0) from minibatch trials...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 250.00 / 280 (89.3%): 100%|██████████| 280/280 [00:00<00:00, 3876.82it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 250 / 280 (89.3%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 22 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:00<00:00, 3661.87it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0, 96.0, 88.0]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 23 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 21.00 / 25 (84.0%): 100%|██████████| 25/25 [00:00<00:00, 3813.97it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 21 / 25 (84.0%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 84.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 5'].\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0, 96.0, 88.0, 84.0]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 24 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 25.00 / 25 (100.0%): 100%|██████████| 25/25 [00:00<00:00, 3199.32it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 25 / 25 (100.0%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 16', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0, 96.0, 88.0, 84.0, 100.0]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 25 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:00<00:00, 4037.33it/s] \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 14', 'Predictor 0: Few-Shot Set 9'].\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0, 96.0, 88.0, 84.0, 100.0, 88.0]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 26 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:00<00:00, 3603.48it/s]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 16', 'Predictor 0: Few-Shot Set 1'].\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0, 96.0, 88.0, 84.0, 100.0, 88.0, 92.0]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 27 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 19.00 / 25 (76.0%): 100%|██████████| 25/25 [00:00<00:00, 3463.05it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 19 / 25 (76.0%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 76.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 18', 'Predictor 0: Few-Shot Set 11'].\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [80.0, 92.0, 68.0, 68.0, 88.0, 80.0, 76.0, 68.0, 88.0, 80.0, 92.0, 84.0, 96.0, 88.0, 96.0, 92.0, 76.0, 96.0, 88.0, 84.0, 100.0, 88.0, 92.0, 76.0]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 28 /28 - Full Evaluation =====\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 100.0) from minibatch trials...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 249.00 / 280 (88.9%): 100%|██████████| 280/280 [00:00<00:00, 4097.99it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:23:58 INFO dspy.evaluate.evaluate: Average Metric: 249 / 280 (88.9%)\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.64, 89.29, 88.93]\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.64\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 89.64!\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:\n", + "num_trials: 25\n", + "minibatch: True\n", + "num_candidates: 19\n", + "valset size: 280\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.\n", + "\n", + "2025/02/02 01:23:58 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=19 sets of demonstrations...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Bootstrapping set 1/19\n", + "Bootstrapping set 2/19\n", + "Bootstrapping set 3/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [02:18<38:05, 34.63s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 4/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [01:10<19:20, 17.58s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 5/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [01:15<20:47, 18.91s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 6/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [01:42<28:17, 25.73s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 7/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 7%|▋ | 5/70 [02:01<26:16, 24.25s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.\n", + "Bootstrapping set 8/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:11<12:56, 11.26s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 9/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [01:49<30:07, 27.38s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 2 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 10/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 6%|▌ | 4/70 [01:37<26:49, 24.39s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", + "Bootstrapping set 11/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:12<14:33, 12.65s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 12/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 4%|▍ | 3/70 [00:46<17:14, 15.45s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 2 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.\n", + "Bootstrapping set 13/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:28<32:40, 28.41s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 14/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 7%|▋ | 5/70 [01:38<21:17, 19.65s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.\n", + "Bootstrapping set 15/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:12<14:44, 12.82s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 16/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 3%|▎ | 2/70 [00:33<18:48, 16.59s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n", + "Bootstrapping set 17/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:23<27:34, 23.98s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", + "Bootstrapping set 18/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 3%|▎ | 2/70 [01:12<40:50, 36.03s/it] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n", + "Bootstrapping set 19/19\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 1%|▏ | 1/70 [00:50<57:57, 50.40s/it]\n", + "2025/02/02 01:42:23 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==\n", + "2025/02/02 01:42:23 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.\n", + "2025/02/02 01:42:23 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "Proposing instructions...\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given the fields `question`, produce the fields `answer`.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a mathematics tutor specializing in structured problem solving. For each question: 1) Analyze the problem type and required mathematical domains (algebra, number theory, etc.) 2) Break down the solution using step-by-step logic with proper formula application 3) Explicitly show intermediate calculations while maintaining dimensional consistency 4) Verify solutions meet any stated constraints (coprime requirements, integer results etc.) 5) Present your final answer in boxed notation following all problem-specific formatting rules. Ensure reasoning demonstrates conceptual understanding rather than just procedural execution.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Solve the mathematical problem through explicit step-by-step analysis. First, identify the core mathematical concept required (algebraic manipulation, polynomial degree analysis, exponential decay calculation). Break down the solution process into logical steps using appropriate formulas and substitutions. For polynomial problems: Track degree relationships through composition. For percentage/decay problems: Show exponentiation steps with base conversion. Validate intermediate results against problem constraints (e.g., 4n ≤ n impossibility). Finally, ensure your boxed answer matches the calculation precision requested. Present your reasoning using mathematical notation (e.g., 244 × 0.99¹⁰ ≈ 220.67) before stating the final answer.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 3: You are an expert mathematician specializing in algebraic problem solving. For each question: 1) Analyze the problem type (proportional reasoning, polynomial expansion, sequence manipulation, etc.) 2) Break down constraints and relationships using appropriate mathematical notation 3) Validate intermediate steps for consistency with mathematical principles 4) Verify solutions satisfy all original conditions (coprimality, integer requirements, etc.) 5) Present final answer in boxed notation. Ensure explicit demonstration of: - Application of inverse/direct proportionality constants - Validation of consecutive integer sequences through substitution - Coefficient matching verification in polynomial equations - Dimensional analysis for real-world applications - Alternative solution path confirmation when applicable.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 4: Analyze the mathematical problem systematically by: 1) Identifying key constraints and relationships using algebraic/number theory principles 2) Formulating equations with proper variable definitions 3) Executing stepwise calculations with LaTeX formatting 4) Verifying solutions through alternative methods or constraint checks. Structure your reasoning as: [Concept Identification] → [Equation Setup] → [Algebraic Manipulation] → [Verification Proof]. Ensure the final answer is boxed and matches both dimensional analysis and problem constraints. For geometric problems, include coordinate system justification where applicable.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 5: Analyze the mathematical problem systematically and provide a detailed, step-by-step solution using proper notation. First identify required formulas/operations (slope, midpoint, algebraic manipulation), then compute intermediate results with explicit verification of each step. Present your final answer as a boxed numerical value. Ensure all calculations maintain mathematical precision and explicitly connect each reasoning step from problem statement to solution.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 6: You are a senior mathematical analyst taking a critical certification exam where incorrect answers could lead to professional disqualification. Given complex quantitative problems requiring precise reasoning, carefully: 1) Analyze all constraints and relationships 2) Perform step-by-step calculations using proper mathematical notation 3) Validate intermediate results against domain-specific requirements (financial thresholds, algebraic validity, etc.) 4) Present your final answer with exact formatting required. Any miscalculation or reasoning gap will result in catastrophic failure - you must be meticulously accurate.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 7: Analyze the mathematical problem thoroughly by first identifying the core problem type (quadratic equations, systems of equations, algebraic manipulations, etc.). Apply step-by-step reasoning that demonstrates: 1) Relevant mathematical principles/theorems used 2) Intermediate calculations with proper algebraic notation 3) Verification through alternative methods (factoring, substitution, geometric interpretation) where applicable. Ensure all constraints from the original problem are explicitly addressed (coprime requirements, integer solutions, real-world context). Structure your reasoning to show conceptual understanding before numerical computation, and conclude with a boxed final answer formatted as \\boxed{answer} that matches the problem's requested output type (integer, simplified fraction, etc.). Cross-validate your solution path against potential alternative approaches to ensure robustness.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 8: Analyze the mathematical question through systematic problem decomposition. First, define symbolic representations for unknown quantities. Demonstrate each algebraic manipulation step-by-step using proper mathematical notation. Validate critical steps through alternative solution paths or sanity checks. For final answers requiring numerical values: 1) Present exact fractional forms before decimal conversion when applicable 2) Verify results satisfy all original constraints 3) Format using boxed LaTeX (e.g., \\boxed{7.5}). Ensure reasoning trajectory explicitly justifies every operational decision while maintaining alignment with mathematical domain conventions (algebraic, geometric, number theory).\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 9: Analyze the mathematical problem through systematic step-by-step reasoning. Begin by identifying key quantities and relationships, then execute calculations showing all intermediate steps. Verify solutions against mathematical constraints (e.g., integer requirements, geometric validity) and domain-specific principles (exponent rules, equation balancing). For algebraic manipulations, clearly show each transformation with proper justification. When handling real-world contexts, ensure unit consistency and contextual relevance. Present the final answer as a boxed numerical value with precision matching the problem's requirements (decimal places, fraction reduction). Maintain rigorous mathematical notation throughout, explicitly handling negatives, fractions, and multi-step operations while demonstrating error-checking at critical junctures.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 10: Act as a mathematical expert participating in a critical engineering certification exam where errors could compromise structural safety. For each problem: 1) Systematically decompose the problem into cases/scenarios 2) Set up equations using proper mathematical notation 3) Execute substitutions and algebraic operations with step-by-step verification 4) Validate solutions against all constraints (sign conditions, integer requirements, etc.) 5) Perform cross-check substitutions in original equations. Present rigorous reasoning using exact mathematical expressions, and deliver your final answer boxed after confirming it meets all requirements. Any miscalculation will result in catastrophic system failure.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 11: You are a top mathematician competing in an international problem-solving championship. Given a complex mathematical question, you must:\n", + "1. Analyze the problem structure and identify key constraints/definitions\n", + "2. Break down the solution into logical steps with proper mathematical notation\n", + "3. Apply relevant formulas/theorems while explicitly showing calculations\n", + "4. Verify intermediate results against problem conditions (e.g., co-primality, integer solutions)\n", + "5. Confirm your final answer satisfies all original constraints\n", + "\n", + "Present your rigorous reasoning using equations and derivations, then box your exact numerical answer. A single error could cost your team the championship - ensure flawless execution and absolute precision in both reasoning and final answer formatting.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 12: Analyze the mathematical problem systematically through these steps: \n", + "1. Identify required formulas/theorems and state them explicitly\n", + "2. Perform detailed algebraic expansions/calculations with proper mathematical notation\n", + "3. Verify intermediate results against problem constraints (e.g., co-prime requirements, coefficient matching)\n", + "4. Confirm solution validity through multiple verification checks\n", + "5. Present final answer in \\boxed{} notation only after complete derivation\n", + "\n", + "Ensure each logical transition is explicitly justified using mathematical principles from algebra, number theory, or discrete mathematics. For polynomial problems, show coefficient comparisons. For geometric problems, include coordinate/dimensional analysis. Always conclude with verification that the solution satisfies all original problem conditions before providing the boxed answer.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 13: As an expert mathematical problem solver competing in an international mathematics olympiad, carefully analyze each question and demonstrate flawless reasoning. Your solution will be scored based on: (1) Correct application of algebraic principles (2) Systematic verification of intermediate steps (3) Strict adherence to mathematical notation standards. Follow this protocol:\n", + "\n", + "1. Expand all expressions completely using distributive properties\n", + "2. Systematically compare coefficients for polynomial equations\n", + "3. Validate solutions against original constraints (e.g., integer requirements, equation satisfaction)\n", + "4. Present final answers in \\boxed{} notation\n", + "5. Include error checking steps to ensure no arithmetic mistakes\n", + "\n", + "Failure to properly verify solutions or format answers correctly will result in point deductions. Your career as a competitive mathematician depends on maintaining a perfect score record - treat each step with maximum rigor.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 14: You are an expert mathematics tutor specializing in algebraic problem solving. For each question: \n", + "1. Analyze the ratio/equation structure and identify applicable mathematical principles\n", + "2. Methodically apply operations (substitution, Vieta's formulas, polynomial factoring) with clear step explanations\n", + "3. Verify intermediate results align with given conditions (ratios, equation constraints)\n", + "4. Finalize with a boxed numerical answer in \\boxed{} notation. \n", + "Always show verification steps to confirm solution consistency, and format fractions/negatives precisely. Prioritize systematic reasoning over shortcut answers.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 15: 1. Systematically applying relevant formulas/theorems \n", + "2. Showing complete algebraic manipulations with proper notation \n", + "3. Verifying critical steps through alternative methods \n", + "4. Presenting the final answer as \\boxed{value}\n", + "\n", + "Adhere to mathematical rigor while maintaining clear step progression. For ratio/sequence problems, explicitly track relationships between terms. For quadratics, demonstrate both formulaic and factorization approaches when applicable.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 16: Solve the mathematical question systematically by: \n", + "1. Analyzing the problem to identify relevant formulas/principles\n", + "2. Constructing clear equations using proper LaTeX notation\n", + "3. Showing all algebraic manipulations and intermediate calculations\n", + "4. Verifying solutions against mathematical constraints (coprimality, square-free requirements, etc.)\n", + "5. Concluding with a boxed final answer in standard form (\\boxed{})\n", + "\n", + "Ensure your reasoning:\n", + "- Explicitly states each logical step\n", + "- Handles special conditions mentioned in the problem\n", + "- Maintains mathematical rigor throughout\n", + "- Validates intermediate results when applicable\n", + "- For real-world scenarios, preserves unit consistency\n", + "- For recursive/iterative structures, establishes and solves proper equations\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 17: You are an expert mathematics tutor with deep knowledge of algebraic problem-solving and quantitative reasoning. For each problem:\n", + "1. Analyze the question type (algebraic manipulation, proportionality, continued fractions, etc.)\n", + "2. Break down the problem into sequential logical/mathematical steps\n", + "3. Explicitly state relevant formulas/principles (e.g., inverse proportionality = k/C, exponential decay formula)\n", + "4. Show all intermediate calculations using proper mathematical notation\n", + "5. Verify solutions satisfy initial constraints (e.g., square-free terms, physical plausibility)\n", + "6. Format final answers as boxed numerals (e.g., \\boxed{9}) or specified forms (a+b√c combinations)\n", + "\n", + "When handling multi-step problems: First establish relationships between quantities, set up equations systematically, solve through algebraic manipulation, then simplify. For continued fractions/recursive patterns: define recurring terms and solve resulting equations. Always check that your final answer matches the problem's requested format and mathematical constraints.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: 18: Analyze the mathematical problem through rigorous algebraic derivation. First, identify key components and relationships in the question. Define variables for unknown quantities and establish equations using formal mathematical notation. Systematically perform symbolic manipulations - showing all polynomial expansions, substitutions, and applications of exponent rules. When solving equations, explicitly state which solutions satisfy original constraints (positivity, coprimality, etc.). For geometric problems, employ coordinate systems and geometric principles. Validate intermediate results against problem conditions (e.g., check square-free requirements). Present final answers as boxed values derived directly from your algebraic working, ensuring they match the requested format (simplified radicals, rounded numbers, etc.). Never skip logical steps or make unsupported assumptions.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.\n", + "\n", + "2025/02/02 01:54:33 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 1 / 28 - Full Evaluation of Default Program ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 209.00 / 280 (74.6%): 100%|██████████| 280/280 [00:00<00:00, 4584.08it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:54:34 INFO dspy.evaluate.evaluate: Average Metric: 209 / 280 (74.6%)\n", + "2025/02/02 01:54:34 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 74.64\n", + "\n", + "2025/02/02 01:54:34 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:39<00:00, 1.58s/it] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:55:13 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)\n", + "2025/02/02 01:55:13 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:55:13 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0]\n", + "2025/02/02 01:55:13 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:55:13 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:55:13 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:55:13 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:10<00:00, 2.34it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:55:24 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:55:24 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 10', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:55:24 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0]\n", + "2025/02/02 01:55:24 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:55:24 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:55:24 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:55:24 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:10<00:00, 2.45it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:55:34 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:55:34 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 18'].\n", + "2025/02/02 01:55:34 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0]\n", + "2025/02/02 01:55:34 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:55:34 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:55:34 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:55:34 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:07<00:00, 3.47it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:55:41 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:55:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 15', 'Predictor 0: Few-Shot Set 2'].\n", + "2025/02/02 01:55:41 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0]\n", + "2025/02/02 01:55:41 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:55:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:55:41 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:55:41 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 25.00 / 25 (100.0%): 100%|██████████| 25/25 [00:11<00:00, 2.16it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:55:53 INFO dspy.evaluate.evaluate: Average Metric: 25 / 25 (100.0%)\n", + "2025/02/02 01:55:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 18'].\n", + "2025/02/02 01:55:53 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0]\n", + "2025/02/02 01:55:53 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:55:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:55:53 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:55:53 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 7 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 19.00 / 25 (76.0%): 100%|██████████| 25/25 [00:11<00:00, 2.12it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:56:05 INFO dspy.evaluate.evaluate: Average Metric: 19 / 25 (76.0%)\n", + "2025/02/02 01:56:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 76.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 1'].\n", + "2025/02/02 01:56:05 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0]\n", + "2025/02/02 01:56:05 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:56:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:56:05 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:56:05 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 21.00 / 25 (84.0%): 100%|██████████| 25/25 [00:28<00:00, 1.16s/it]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:56:34 INFO dspy.evaluate.evaluate: Average Metric: 21 / 25 (84.0%)\n", + "2025/02/02 01:56:34 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 84.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 12'].\n", + "2025/02/02 01:56:34 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0]\n", + "2025/02/02 01:56:34 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:56:34 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:56:34 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:56:34 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 17.00 / 25 (68.0%): 100%|██████████| 25/25 [00:12<00:00, 2.04it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:56:46 INFO dspy.evaluate.evaluate: Average Metric: 17 / 25 (68.0%)\n", + "2025/02/02 01:56:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 11', 'Predictor 0: Few-Shot Set 13'].\n", + "2025/02/02 01:56:46 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0]\n", + "2025/02/02 01:56:46 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:56:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:56:46 INFO dspy.teleprompt.mipro_optimizer_v2: =========================================\n", + "\n", + "\n", + "2025/02/02 01:56:46 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:10<00:00, 2.31it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:56:57 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:56:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 4'].\n", + "2025/02/02 01:56:57 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0]\n", + "2025/02/02 01:56:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64]\n", + "2025/02/02 01:56:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 74.64\n", + "2025/02/02 01:56:57 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:56:57 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 /28 - Full Evaluation =====\n", + "2025/02/02 01:56:57 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 100.0) from minibatch trials...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 250.00 / 279 (89.6%): 100%|█████████▉| 279/280 [00:45<00:01, 1.42s/it]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:58:10 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'If $x$, $y$, and $z$ are positive integers such that $6xyz+30xy+21xz+2yz+105x+10y+7z=812$, find $x+y+z$.', 'reasoning': \"Usually when we apply Simon's Favorite Factoring Trick, we have two variables. Maybe we can find an adaptation for three variables. We notice that four of the terms on the left hand side have a factor of $z$ in them, so we can factor it out as: $$z(6xy+21x+2y+7)+30xy+105x+10y=812.$$This looks promising! Add $35$ to each side and continue factoring: \\\\begin{align*}\\nz(6xy+21x+2y+7)+30xy+105x+10y+35&=812+35 \\\\quad \\\\Rightarrow \\\\\\\\\\nz(6xy+21x+2y+7)+5(6xy+21x+2y+7)&=812+35 \\\\quad \\\\Rightarrow \\\\\\\\\\n(z+5)(6xy+21x+2y+7)&=847.\\n\\\\end{align*}Now we can proceed with the two-variable version of Simon's Favorite Factoring Trick on the remaining four-term factor: \\\\begin{align*}\\n(z+5)(3x(2y+7)+2y+7)&=847 \\\\quad \\\\Rightarrow \\\\\\\\\\n(z+5)(3x+1)(2y+7)&=847.\\n\\\\end{align*}The prime factorization of $847$ is $7\\\\cdot 11^2$. We must find $3$ numbers which multiply to $847$ and assign them to $z+5$, $3x+1$, and $2y+7$. We know none of the factors can be negative, since then we would have a negative solution for $x$, $y$ or $z$, which must be positive numbers. Similarly, no factor can be $1$ because that would give either $z=-4$, $x=0$, or $y=-3$, none of which is allowable. There are only $3$ non-one factors which multiply to $847$, so in some order our three factors must be $7$, $11$, and $11$.\\n\\nWe examine the $3x+1$ term. If this factor is equal to $11$, then $x=\\\\frac{10}{3}$, which is not an integer. So $3x+1=7$ and $x=2$. The remaining factors must equal $11$. Setting $2y+7=11$ gives $y=2$, and setting $z+5=11$ gives $z=6$. Thus $x+y+z=2+2+6=\\\\boxed{10}$.\", 'answer': '10'}) (input_keys={'question'}): 'list' object has no attribute 'items'. Set `provide_traceback=True` to see the stack trace.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 250.00 / 279 (89.6%): 100%|██████████| 280/280 [01:13<00:00, 3.83it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:58:10 INFO dspy.evaluate.evaluate: Average Metric: 250.0 / 280 (89.3%)\n", + "2025/02/02 01:58:10 INFO dspy.teleprompt.mipro_optimizer_v2: \u001b[92mNew best full eval score!\u001b[0m Score: 89.29\n", + "2025/02/02 01:58:10 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:58:10 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:58:10 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", + "2025/02/02 01:58:10 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 01:58:10 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:07<00:00, 3.29it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:58:18 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:58:18 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 3'].\n", + "2025/02/02 01:58:18 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0]\n", + "2025/02/02 01:58:18 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:58:18 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:58:18 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:58:18 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 13 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:08<00:00, 2.86it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:58:27 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:58:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 7'].\n", + "2025/02/02 01:58:27 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0]\n", + "2025/02/02 01:58:27 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:58:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:58:27 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:58:27 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 14 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:09<00:00, 2.53it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:58:37 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)\n", + "2025/02/02 01:58:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 16'].\n", + "2025/02/02 01:58:37 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0]\n", + "2025/02/02 01:58:37 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:58:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:58:37 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:58:37 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 15 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:40<00:00, 1.63s/it] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:59:17 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)\n", + "2025/02/02 01:59:17 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 16'].\n", + "2025/02/02 01:59:17 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0]\n", + "2025/02/02 01:59:17 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:59:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:59:17 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:59:17 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 16 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 20.00 / 25 (80.0%): 100%|██████████| 25/25 [00:12<00:00, 1.98it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:59:30 INFO dspy.evaluate.evaluate: Average Metric: 20 / 25 (80.0%)\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 6'].\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0]\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 17 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:00<00:00, 4504.58it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:59:30 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 18'].\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0]\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:59:30 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 18 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:10<00:00, 2.44it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:59:40 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:59:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 18', 'Predictor 0: Few-Shot Set 16'].\n", + "2025/02/02 01:59:40 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0]\n", + "2025/02/02 01:59:40 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:59:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:59:40 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:59:40 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 19 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 19.00 / 25 (76.0%): 100%|██████████| 25/25 [00:09<00:00, 2.64it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:59:50 INFO dspy.evaluate.evaluate: Average Metric: 19 / 25 (76.0%)\n", + "2025/02/02 01:59:50 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 76.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 14'].\n", + "2025/02/02 01:59:50 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0]\n", + "2025/02/02 01:59:50 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:59:50 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:59:50 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:59:50 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 20 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:09<00:00, 2.70it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 01:59:59 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 01:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 15'].\n", + "2025/02/02 01:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0, 88.0]\n", + "2025/02/02 01:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29]\n", + "2025/02/02 01:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 01:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 01:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 21 /28 - Full Evaluation =====\n", + "2025/02/02 01:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 96.0) from minibatch trials...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 250.00 / 279 (89.6%): 100%|█████████▉| 279/280 [00:51<00:02, 2.71s/it]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:00:59 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'If $x$, $y$, and $z$ are positive integers such that $6xyz+30xy+21xz+2yz+105x+10y+7z=812$, find $x+y+z$.', 'reasoning': \"Usually when we apply Simon's Favorite Factoring Trick, we have two variables. Maybe we can find an adaptation for three variables. We notice that four of the terms on the left hand side have a factor of $z$ in them, so we can factor it out as: $$z(6xy+21x+2y+7)+30xy+105x+10y=812.$$This looks promising! Add $35$ to each side and continue factoring: \\\\begin{align*}\\nz(6xy+21x+2y+7)+30xy+105x+10y+35&=812+35 \\\\quad \\\\Rightarrow \\\\\\\\\\nz(6xy+21x+2y+7)+5(6xy+21x+2y+7)&=812+35 \\\\quad \\\\Rightarrow \\\\\\\\\\n(z+5)(6xy+21x+2y+7)&=847.\\n\\\\end{align*}Now we can proceed with the two-variable version of Simon's Favorite Factoring Trick on the remaining four-term factor: \\\\begin{align*}\\n(z+5)(3x(2y+7)+2y+7)&=847 \\\\quad \\\\Rightarrow \\\\\\\\\\n(z+5)(3x+1)(2y+7)&=847.\\n\\\\end{align*}The prime factorization of $847$ is $7\\\\cdot 11^2$. We must find $3$ numbers which multiply to $847$ and assign them to $z+5$, $3x+1$, and $2y+7$. We know none of the factors can be negative, since then we would have a negative solution for $x$, $y$ or $z$, which must be positive numbers. Similarly, no factor can be $1$ because that would give either $z=-4$, $x=0$, or $y=-3$, none of which is allowable. There are only $3$ non-one factors which multiply to $847$, so in some order our three factors must be $7$, $11$, and $11$.\\n\\nWe examine the $3x+1$ term. If this factor is equal to $11$, then $x=\\\\frac{10}{3}$, which is not an integer. So $3x+1=7$ and $x=2$. The remaining factors must equal $11$. Setting $2y+7=11$ gives $y=2$, and setting $z+5=11$ gives $z=6$. Thus $x+y+z=2+2+6=\\\\boxed{10}$.\", 'answer': '10'}) (input_keys={'question'}): Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning']). Set `provide_traceback=True` to see the stack trace.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 250.00 / 279 (89.6%): 100%|██████████| 280/280 [00:59<00:00, 4.73it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:00:59 INFO dspy.evaluate.evaluate: Average Metric: 250.0 / 280 (89.3%)\n", + "2025/02/02 02:00:59 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29]\n", + "2025/02/02 02:00:59 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 02:00:59 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", + "2025/02/02 02:00:59 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 02:00:59 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 22 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:15<00:00, 1.59it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:01:14 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 02:01:14 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 14', 'Predictor 0: Few-Shot Set 16'].\n", + "2025/02/02 02:01:14 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0, 88.0, 88.0]\n", + "2025/02/02 02:01:14 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29]\n", + "2025/02/02 02:01:14 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 02:01:14 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 02:01:14 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 23 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:10<00:00, 2.32it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:01:25 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 02:01:25 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 5'].\n", + "2025/02/02 02:01:25 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0, 88.0, 88.0, 88.0]\n", + "2025/02/02 02:01:25 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29]\n", + "2025/02/02 02:01:25 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 02:01:25 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 02:01:25 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 24 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 21.00 / 25 (84.0%): 100%|██████████| 25/25 [00:09<00:00, 2.56it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:01:35 INFO dspy.evaluate.evaluate: Average Metric: 21 / 25 (84.0%)\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 84.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 9'].\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0, 88.0, 88.0, 88.0, 84.0]\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29]\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 25 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:00<00:00, 2906.25it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:01:35 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 16'].\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0, 88.0, 88.0, 88.0, 84.0, 96.0]\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29]\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 02:01:35 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 26 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:10<00:00, 2.33it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:01:46 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 02:01:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 13', 'Predictor 0: Few-Shot Set 16'].\n", + "2025/02/02 02:01:46 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0, 88.0, 88.0, 88.0, 84.0, 96.0, 88.0]\n", + "2025/02/02 02:01:46 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29]\n", + "2025/02/02 02:01:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 02:01:46 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 02:01:46 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 27 / 28 - Minibatch ==\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [00:16<00:00, 1.47it/s] " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:02:03 INFO dspy.evaluate.evaluate: Average Metric: 22 / 25 (88.0%)\n", + "2025/02/02 02:02:03 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17', 'Predictor 0: Few-Shot Set 17'].\n", + "2025/02/02 02:02:03 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [92.0, 88.0, 88.0, 88.0, 100.0, 76.0, 84.0, 68.0, 88.0, 88.0, 88.0, 96.0, 96.0, 80.0, 96.0, 88.0, 76.0, 88.0, 88.0, 88.0, 84.0, 96.0, 88.0, 88.0]\n", + "2025/02/02 02:02:03 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29]\n", + "2025/02/02 02:02:03 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.29\n", + "2025/02/02 02:02:03 INFO dspy.teleprompt.mipro_optimizer_v2: ==========================================\n", + "\n", + "\n", + "2025/02/02 02:02:03 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 28 /28 - Full Evaluation =====\n", + "2025/02/02 02:02:03 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 90.0) from minibatch trials...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 254.00 / 280 (90.7%): 100%|██████████| 280/280 [00:53<00:00, 5.20it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:02:57 INFO dspy.evaluate.evaluate: Average Metric: 254 / 280 (90.7%)\n", + "2025/02/02 02:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: \u001b[92mNew best full eval score!\u001b[0m Score: 90.71\n", + "2025/02/02 02:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [74.64, 89.29, 89.29, 90.71]\n", + "2025/02/02 02:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 90.71\n", + "2025/02/02 02:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", + "2025/02/02 02:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: \n", + "\n", + "2025/02/02 02:02:57 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 90.71!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 317.00 / 350 (90.6%): 100%|██████████| 350/350 [00:57<00:00, 6.04it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:03:55 INFO dspy.evaluate.evaluate: Average Metric: 317 / 350 (90.6%)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average Metric: 224.00 / 349 (64.2%): 100%|█████████▉| 349/350 [05:50<00:05, 5.09s/it]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:12:13 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'Find the smallest possible value of $$\\\\frac{(y-x)^2}{(y-z)(z-x)} + \\\\frac{(z-y)^2}{(z-x)(x-y)} + \\\\frac{(x-z)^2}{(x-y)(y-z)},$$ where $x,y,$ and $z$ are distinct real numbers.', 'reasoning': 'Combining all three fractions under a single denominator, the given expression is equal to $$\\\\frac{(x-y)^3 + (y-z)^3 + (z-x)^3}{(x-y)(y-z)(z-x)}.$$ Consider the numerator as a polynomial in $x$, so that $P(x) = (x-y)^3 + (y-z)^3 + (z-x)^3$ (where we treat $y$ and $z$ as fixed values). It follows that $P(y) = (y-y)^3 + (y-z)^3 + (z-y)^3 = 0$, so $y$ is a root of $P(x) = 0$ and $x-y$ divides into $P(x)$. By symmetry, it follows that $y-z$ and $z-x$ divide into $P(x)$. Since $P$ is a cubic in its variables, it follows that $P = k(x-y)(y-z)(z-x)$, where $k$ is a constant. By either expanding the definition of $P$, or by trying test values (if we take $x = 0, y = -1, z = 1$, we obtain $P = -6 = k \\\\cdot (-2)$), it follows that $k = 3$. Thus, $$\\\\frac{(x-y)^3 + (y-z)^3 + (z-x)^3}{(x-y)(y-z)(z-x)} = \\\\boxed{3}.$$', 'answer': '3'}) (input_keys={'question'}): Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning']). Set `provide_traceback=True` to see the stack trace.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 224.00 / 349 (64.2%): 100%|██████████| 350/350 [08:18<00:00, 1.42s/it]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/02/02 02:12:13 INFO dspy.evaluate.evaluate: Average Metric: 224.0 / 350 (64.0%)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "64.0" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kwargs_4o = dict(num_threads=THREADS, task_model=gpt4o, teacher_settings=dict(lm=gpt4o), prompt_model=gpt4o)\n", + "kwargs_r1 = dict(num_threads=THREADS, task_model=r1, teacher_settings=dict(lm=r1), prompt_model=r1)\n", + "optimizer_4o = dspy.MIPROv2(metric=dataset.metric, auto=\"medium\", **kwargs_4o)\n", + "optimizer_r1 = dspy.MIPROv2(metric=dataset.metric, auto=\"medium\", **kwargs_r1)\n", + "\n", + "\n", + "kwargs = dict(requires_permission_to_run=False, max_bootstrapped_demos=4, max_labeled_demos=4)\n", + "optimized_module_4o = optimizer_4o.compile(module, trainset=dataset.train, **kwargs)\n", + "optimized_module_r1 = optimizer_r1.compile(module, trainset=dataset.train, **kwargs)\n", + "\n", + "with dspy.settings.context(lm=gpt4o):\n", + " gpt4o_compiled = evaluate(optimized_module_4o)\n", + "with dspy.settings.context(lm=r1):\n", + " r1_compiled = evaluate(optimized_module_r1)\n", + "\n", + "gpt4o_compiled\n", + "r1_compiled" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# optimized_module_4o.save('gpt4o_compiled.json')\n", + "# optimized_module_r1.save('r1_compiled.json')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'gpt4o_uncompiled' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Data for the bars\u001b[39;00m\n\u001b[1;32m 7\u001b[0m models \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGPT-4o\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mR1\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m----> 8\u001b[0m uncompiled_scores \u001b[38;5;241m=\u001b[39m [\u001b[43mgpt4o_uncompiled\u001b[49m, r1_uncompiled]\n\u001b[1;32m 9\u001b[0m compiled_scores \u001b[38;5;241m=\u001b[39m [gpt4o_compiled, r1_compiled]\n\u001b[1;32m 11\u001b[0m x \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(\u001b[38;5;28mlen\u001b[39m(models)) \u001b[38;5;66;03m# Label locations\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 'gpt4o_uncompiled' is not defined" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "plt.clf()\n", + "\n", + "# Data for the bars\n", + "models = ['GPT-4o', 'R1']\n", + "uncompiled_scores = [gpt4o_uncompiled, r1_uncompiled]\n", + "compiled_scores = [gpt4o_compiled, r1_compiled]\n", + "\n", + "x = np.arange(len(models)) # Label locations\n", + "width = 0.35 # Width of the bars\n", + "\n", + "# Create bars\n", + "plt.bar(x - width/2, uncompiled_scores, width, label='Uncompiled')\n", + "plt.bar(x + width/2, compiled_scores, width, label='Compiled')\n", + "\n", + "# Customize the plot\n", + "plt.xlabel('Models')\n", + "plt.ylabel('Score')\n", + "plt.title('R1 vs GPT-4o on MATH with MIPROv2')\n", + "plt.xticks(x, models)\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[autoreload of dspy.adapters.chat_adapter failed: Traceback (most recent call last):\n", + " File \"/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/IPython/extensions/autoreload.py\", line 276, in check\n", + " superreload(m, reload, self.old_objects)\n", + " File \"/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/IPython/extensions/autoreload.py\", line 500, in superreload\n", + " update_generic(old_obj, new_obj)\n", + " File \"/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/IPython/extensions/autoreload.py\", line 349, in update_class\n", + " if update_generic(old_obj, new_obj):\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/isaac/projects/dspy/.venv/lib/python3.12/site-packages/IPython/extensions/autoreload.py\", line 309, in update_function\n", + " setattr(old, name, getattr(new, name))\n", + "ValueError: __init__() requires a code object with 1 free vars, not 0\n", + "]\n" + ] + }, + { + "ename": "TypeError", + "evalue": "super(type, obj): obj must be an instance or subtype of type", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[51], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdspy\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m chat_adapter \u001b[38;5;241m=\u001b[39m \u001b[43mdspy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mChatAdapter\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m dspy\u001b[38;5;241m.\u001b[39msettings\u001b[38;5;241m.\u001b[39mcontext(lm\u001b[38;5;241m=\u001b[39mr1, adapter\u001b[38;5;241m=\u001b[39mchat_adapter):\n\u001b[1;32m 5\u001b[0m module \u001b[38;5;241m=\u001b[39m dspy\u001b[38;5;241m.\u001b[39mChainOfThought(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquestion -> answer\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/projects/dspy/dspy/adapters/chat_adapter.py:35\u001b[0m, in \u001b[0;36mChatAdapter.__init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m()\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minside chat adapter init\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mTypeError\u001b[0m: super(type, obj): obj must be an instance or subtype of type" + ] + } + ], + "source": [ + "import dspy\n", + "\n", + "chat_adapter = dspy.ChatAdapter()\n", + "with dspy.settings.context(lm=r1, adapter=chat_adapter):\n", + " module = dspy.ChainOfThought(\"question -> answer\")\n", + " module(question=example.question)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 310.00 / 350 (88.6%): 100%|██████████| 350/350 [01:31<00:00, 3.84it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024/11/28 18:59:19 INFO dspy.evaluate.evaluate: Average Metric: 310 / 350 (88.6%)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionexample_reasoningexample_answerpred_reasoningpred_answermethod
0What is the smallest integer value of $c$ such that the function $...The given function has a domain of all real numbers if and only if...1The function \\( f(x) = \\frac{x^2 + 1}{x^2 - x + c} \\) will have a ...1✔️ [True]
1What is the least value of $x$ that is a solution of $|{-x+3}|=7$?In order to have $|{-x+3}| = 7$, we must have $-x + 3 = 7$ or $-x ...-4The equation \\( |{-x+3}|=7 \\) implies two possible cases: 1. \\(-x ...-4✔️ [True]
2Evaluate $\\left\\lceil -\\frac{7}{4}\\right\\rceil$.$-\\frac{7}{4}$ is between $-1$ and $-2$, so $\\left\\lceil -\\frac{7}...-1To evaluate \\(\\left\\lceil -\\frac{7}{4}\\right\\rceil\\), we first nee...-1✔️ [True]
3A triangle has vertices at coordinates $(11,1)$, $(2,3)$ and $(3,7...We must find the distance between each pair of points by using the...10To find the length of the sides of the triangle formed by the vert...10✔️ [True]
4Let $f(x) = x + 2$ and $g(x) = 1/f(x)$. What is $g(f(-3))$?First, we find that $f(-3) = (-3) + 2 = -1$. Then, $$g(f(-3)) = g(...1To find \\( g(f(-3)) \\), we first need to evaluate \\( f(-3) \\). Usi...1✔️ [True]
\n", + "
" + ], + "text/plain": [ + " question \\\n", + "0 What is the smallest integer value of $c$ such that the function $... \n", + "1 What is the least value of $x$ that is a solution of $|{-x+3}|=7$? \n", + "2 Evaluate $\\left\\lceil -\\frac{7}{4}\\right\\rceil$. \n", + "3 A triangle has vertices at coordinates $(11,1)$, $(2,3)$ and $(3,7... \n", + "4 Let $f(x) = x + 2$ and $g(x) = 1/f(x)$. What is $g(f(-3))$? \n", + "\n", + " example_reasoning \\\n", + "0 The given function has a domain of all real numbers if and only if... \n", + "1 In order to have $|{-x+3}| = 7$, we must have $-x + 3 = 7$ or $-x ... \n", + "2 $-\\frac{7}{4}$ is between $-1$ and $-2$, so $\\left\\lceil -\\frac{7}... \n", + "3 We must find the distance between each pair of points by using the... \n", + "4 First, we find that $f(-3) = (-3) + 2 = -1$. Then, $$g(f(-3)) = g(... \n", + "\n", + " example_answer \\\n", + "0 1 \n", + "1 -4 \n", + "2 -1 \n", + "3 10 \n", + "4 1 \n", + "\n", + " pred_reasoning \\\n", + "0 The function \\( f(x) = \\frac{x^2 + 1}{x^2 - x + c} \\) will have a ... \n", + "1 The equation \\( |{-x+3}|=7 \\) implies two possible cases: 1. \\(-x ... \n", + "2 To evaluate \\(\\left\\lceil -\\frac{7}{4}\\right\\rceil\\), we first nee... \n", + "3 To find the length of the sides of the triangle formed by the vert... \n", + "4 To find \\( g(f(-3)) \\), we first need to evaluate \\( f(-3) \\). Usi... \n", + "\n", + " pred_answer method \n", + "0 1 ✔️ [True] \n", + "1 -4 ✔️ [True] \n", + "2 -1 ✔️ [True] \n", + "3 10 ✔️ [True] \n", + "4 1 ✔️ [True] " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " ... 345 more rows not displayed ...\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "88.57" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evaluate(optimized_module)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Neat. It was pretty straightforward to improve quality from 74% to over 88% on a held-out set here.\n", + "\n", + "That said, for reasoning tasks like this, you will often want to consider more advanced strategies, like:\n", + "\n", + "- A `dspy.ReAct` module with access to a calculator function or `dspy.PythonInterpreter`\n", + "- Ensembling multiple optimized prompts with a majority vote (or an Aggregator module) on top" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Just to understand what changed, let's view the prompt after optimization." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\u001b[34m[2024-11-28T18:59:19.176586]\u001b[0m\n", + "\n", + "\u001b[31mSystem message:\u001b[0m\n", + "\n", + "Your input fields are:\n", + "1. `question` (str)\n", + "\n", + "Your output fields are:\n", + "1. `reasoning` (str)\n", + "2. `answer` (str)\n", + "\n", + "All interactions will be structured in the following way, with the appropriate values filled in.\n", + "\n", + "[[ ## question ## ]]\n", + "{question}\n", + "\n", + "[[ ## reasoning ## ]]\n", + "{reasoning}\n", + "\n", + "[[ ## answer ## ]]\n", + "{answer}\n", + "\n", + "[[ ## completed ## ]]\n", + "\n", + "In adhering to this structure, your objective is: \n", + " Analyze the `question` provided, and systematically apply mathematical reasoning to derive the `answer`. Ensure to articulate each step of your thought process in the `reasoning` field, detailing how you identify relationships and formulate equations to arrive at the solution.\n", + "\n", + "\n", + "\u001b[31mUser message:\u001b[0m\n", + "\n", + "[[ ## question ## ]]\n", + "If $|4x+2|=10$ and $x<0$, what is the value of $x$?\n", + "\n", + "Respond with the corresponding output fields, starting with the field `[[ ## reasoning ## ]]`, then `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.\n", + "\n", + "\n", + "\u001b[31mAssistant message:\u001b[0m\n", + "\n", + "[[ ## reasoning ## ]]\n", + "The equation $|4x+2|=10$ implies two possible equations: $4x+2=10$ or $4x+2=-10$. We solve each equation separately.\n", + "\n", + "1. For $4x+2=10$:\n", + " \\[\n", + " 4x + 2 = 10 \\\\\n", + " 4x = 8 \\\\\n", + " x = 2\n", + " \\]\n", + " However, $x=2$ does not satisfy the condition $x<0$.\n", + "\n", + "2. For $4x+2=-10$:\n", + " \\[\n", + " 4x + 2 = -10 \\\\\n", + " 4x = -12 \\\\\n", + " x = -3\n", + " \\]\n", + " The solution $x = -3$ satisfies the condition $x<0$.\n", + "\n", + "Therefore, the value of $x$ is $\\boxed{-3}$.\n", + "\n", + "[[ ## answer ## ]]\n", + "-3\n", + "\n", + "[[ ## completed ## ]]\n", + "\n", + "\n", + "\u001b[31mUser message:\u001b[0m\n", + "\n", + "[[ ## question ## ]]\n", + "Given that $-4$ is a solution to $x^2 + bx -36 = 0$, what is the value of $b$?\n", + "\n", + "Respond with the corresponding output fields, starting with the field `[[ ## reasoning ## ]]`, then `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.\n", + "\n", + "\n", + "\u001b[31mAssistant message:\u001b[0m\n", + "\n", + "[[ ## reasoning ## ]]\n", + "The product of the roots of this quadratic is $-36/1=-36$, so the other solution must be $-36/-4=9$. That means that the sum of the solutions is $-4+9=5$. The sum of the solutions is also $-b/1=-b$. Thus, $-b=5$ and $b=\\boxed{-5}$.\n", + "\n", + "[[ ## answer ## ]]\n", + "-5\n", + "\n", + "[[ ## completed ## ]]\n", + "\n", + "\n", + "\u001b[31mUser message:\u001b[0m\n", + "\n", + "[[ ## question ## ]]\n", + "Richard is building a rectangular playground from 200 feet of fencing. The fencing must entirely enclose the playground. What is the maximum area of this playground?\n", + "\n", + "Respond with the corresponding output fields, starting with the field `[[ ## reasoning ## ]]`, then `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.\n", + "\n", + "\n", + "\u001b[31mAssistant message:\u001b[0m\n", + "\n", + "[[ ## reasoning ## ]]\n", + "Let the length of the playground be $l$ and the width be $w$. We have the equation $2l+2w=200 \\Rightarrow l + w = 100$. We want to maximize the area of this rectangular playground, which is given by $lw$. From our equation, we know that $l=100-w$. Substituting this in to our expression for area, we have \\[(100-w)(w)=100w-w^2\\]We will now complete the square to find the maximum value of this expression. Factoring a $-1$ out, we have \\[-(w^2-100w)\\]In order for the expression inside the parenthesis to be a perfect square, we need to add and subtract $(100/2)^2=2500$ inside the parenthesis. Doing this, we get \\[-(w^2-100w+2500-2500) \\Rightarrow -(w-50)^2+2500\\]Since the maximum value of $-(w-50)^2$ is 0 (perfect squares are always nonnegative), the maximum value of the entire expression is 2500, which is achieved when $w=50$ and $l=100-w=50$ (the playground is a square). Thus, the maximum area of the playground is $\\boxed{2500}$ square feet.\n", + "\n", + "[[ ## answer ## ]]\n", + "2500\n", + "\n", + "[[ ## completed ## ]]\n", + "\n", + "\n", + "\u001b[31mUser message:\u001b[0m\n", + "\n", + "[[ ## question ## ]]\n", + "Alice and Bob are playing a game. Alice starts first. On Alice's turn, she flips a coin. If she gets a heads, she wins. If not, it becomes Bob's turn. On Bob's turn, he flips a coin. If he gets a tails, he wins. If not, it becomes Alice's turn. What is the probability that Alice wins the game?\n", + "\n", + "Respond with the corresponding output fields, starting with the field `[[ ## reasoning ## ]]`, then `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.\n", + "\n", + "\n", + "\u001b[31mAssistant message:\u001b[0m\n", + "\n", + "[[ ## reasoning ## ]]\n", + "Alice has a $1/2$ chance of winning the game on her first turn. If she doesn't, then the probability that she wins the game on her second turn is $1/8,$ since she must not win on her first flip ($1/2$ chance), Bob must not win on his first flip ($1/2$ chance), and then Alice must win on her second flip ($1/2$ chance). The probability that she wins the game on her third turn is $1/32,$ and in general, the probability that she wins the game on her $k^\\text{th}$ turn is $(1/2)^{2k-1}.$ Thus, the probability that Alice wins is an infinite geometric series with first term $1/2$ and common ratio $1/4.$ So, the probability that Alice wins the game is $$\\frac{\\frac12}{1-\\frac14} = \\boxed{\\frac{2}{3}}.$$OR\n", + "\n", + "Note that the only difference between the odds of Alice or Bob winning is who goes first. Because Bob goes second, the odds of him winning on his $k^\\text{th}$ flip is half of the odds that Alice wins on her $k^\\text{th}$ flip, since Alice must first get a tails before Bob gets a chance to win. Thus, if $a$ is Alice's chance of winning, and $b$ is Bob's chance of winning, then $a = 2b.$ Also, since someone must win, $a + b = 1.$ It follows that $a = 2/3$ and $b = 1/3,$ so Alice has a $\\boxed{\\frac{2}{3}}$ chance of winning the game.\n", + "\n", + "[[ ## answer ## ]]\n", + "\\frac{2}{3}\n", + "\n", + "[[ ## completed ## ]]\n", + "\n", + "\n", + "\u001b[31mUser message:\u001b[0m\n", + "\n", + "[[ ## question ## ]]\n", + "Find the smallest possible value of $$\\frac{(y-x)^2}{(y-z)(z-x)} + \\frac{(z-y)^2}{(z-x)(x-y)} + \\frac{(x-z)^2}{(x-y)(y-z)},$$ where $x,y,$ and $z$ are distinct real numbers.\n", + "\n", + "Respond with the corresponding output fields, starting with the field `[[ ## reasoning ## ]]`, then `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.\n", + "\n", + "\n", + "\u001b[31mResponse:\u001b[0m\n", + "\n", + "\u001b[32m[[ ## reasoning ## ]]\n", + "Let \\( S = \\frac{(y-x)^2}{(y-z)(z-x)} + \\frac{(z-y)^2}{(z-x)(x-y)} + \\frac{(x-z)^2}{(x-y)(y-z)} \\).\n", + "\n", + "To find the minimum value of \\( S \\), we can use the method of symmetry and the Cauchy-Schwarz inequality. \n", + "\n", + "By applying the Cauchy-Schwarz inequality, we have:\n", + "\\[\n", + "\\left( \\frac{(y-x)^2}{(y-z)(z-x)} + \\frac{(z-y)^2}{(z-x)(x-y)} + \\frac{(x-z)^2}{(x-y)(y-z)} \\right) \\left( (y-z)(z-x) + (z-x)(x-y) + (x-y)(y-z) \\right) \\geq \\left( (y-x) + (z-y) + (x-z) \\right)^2.\n", + "\\]\n", + "The right-hand side simplifies to:\n", + "\\[\n", + "(y-x) + (z-y) + (x-z) = 0,\n", + "\\]\n", + "which means the right-hand side is \\( 0 \\). Therefore, we have:\n", + "\\[\n", + "S \\cdot \\left( (y-z)(z-x) + (z-x)(x-y) + (x-y)(y-z) \\right) \\geq 0.\n", + "\\]\n", + "This implies that \\( S \\) is non-negative.\n", + "\n", + "Next, we can test specific values for \\( x, y, z \\) to find the minimum. Let's set \\( x = 0, y = 1, z = 2 \\):\n", + "\\[\n", + "S = \\frac{(1-0)^2}{(1-2)(2-0)} + \\frac{(2-1)^2}{(2-0)(0-1)} + \\frac{(0-2)^2}{(0-1)(1-2)}.\n", + "\\]\n", + "Calculating each term:\n", + "1. The first term:\n", + " \\[\n", + " \\frac{1^2}{(-1)(2)} = \\frac{1}{-2} = -\\frac{1}{2}.\n", + " \\]\n", + "2. The second term:\n", + " \\[\n", + " \\frac{1^2}{(2)(-1)} = \\frac{1}{-2} = -\\frac{1}{2}.\n", + " \\]\n", + "3. The third term:\n", + " \\[\n", + " \\frac{(-2)^2}{(-1)(-1)} = \\frac{4}{1} = 4.\n", + " \\]\n", + "\n", + "Thus,\n", + "\\[\n", + "S = -\\frac{1}{2} - \\frac{1}{2} + 4 = 3.\n", + "\\]\n", + "\n", + "To check if this is indeed the minimum, we can also consider the case when \\( x, y, z \\) are the roots of a quadratic polynomial. By symmetry and the nature of the expression, we can conclude that the minimum value of \\( S \\) is indeed \\( 3 \\).\n", + "\n", + "Therefore, the smallest possible value of \\( S \\) is \\( \\boxed{3} \\).\n", + "\n", + "[[ ## answer ## ]]\n", + "3\n", + "\n", + "[[ ## completed ## ]]\u001b[0m\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "dspy.inspect_history()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dspy/adapters/chat_adapter.py b/dspy/adapters/chat_adapter.py index e51e90d68..d0d03a9b5 100644 --- a/dspy/adapters/chat_adapter.py +++ b/dspy/adapters/chat_adapter.py @@ -59,6 +59,9 @@ def format(self, signature: Signature, demos: list[dict[str, Any]], inputs: dict return messages def parse(self, signature, completion): + if "" in completion: + print("inside chat adapter parse", completion) + sections = [(None, [])] for line in completion.splitlines(): diff --git a/dspy/adapters/json_adapter.py b/dspy/adapters/json_adapter.py index a2fbcd555..34feb297d 100644 --- a/dspy/adapters/json_adapter.py +++ b/dspy/adapters/json_adapter.py @@ -91,6 +91,9 @@ def format(self, signature, demos, inputs): return messages def parse(self, signature, completion): + if "" in completion: + print("inside json adapter parse", completion) + fields = json_repair.loads(completion) fields = {k: v for k, v in fields.items() if k in signature.output_fields} diff --git a/output.png b/output.png new file mode 100644 index 0000000000000000000000000000000000000000..6ea0cd6038b36fea712ada469e2d54b84097ee4f GIT binary patch literal 16570 zcmeHuXINBOn{BDBb}LG&jR6A&FdzaFC1XH9qU4N$j06cH+0bG_LWv+CpptXWNktSS zNzR~>DS{Fu-?h7EzWJV+J9D48Kjzo;(_O7paq67C->}wt*M6udFS%(0;|2Z7ayZVeyYouv^&}bNJb4*x)L^Sxaf!QYiG7$$zV2#G*|p6!F8- z=T58Kd@xN1m=@U!-K?^~eQ8V@&&7T(Y zE(dOiJZxsF$sU_KZfYs!P@h3pwbgpfmj2oM_niw?J^EEAD%9Jyf6utFk?3UT7WU+Y zz&cO_UHeKr+ky~-+J|-f5OOnVf*90+x~?vb2jYiYkS3h+0`2t z6LVCPZ9A>Ew>M%d%Y_)_X!-fk?C!b~#m=*q(_@|5nP!dD^mJSQ?y^ZGB_(Q+f?1vkU$xJ`lE2OslvBb0E>ecrOYg|n~mekiP^ILVv@~`xkKW(}?@pe6vLCeQa z&yE|WYN!<4ydD`9rPaQh*UT^Ss)$*`QE8=ErO2rc+Zw~1Cr@_sf4Y)tGE^5yUtF)r zW7%=)gnp6m$Azk~lQn}o7ndBm8XHynK0Q~%|vMxu7M{Ahc2GnS0U z<)?M4&|sySK7jKvpz2t1b05rA>V%c>C>l{9XYQUj;qC3v}f^6ZfDcjudd$gyzHx) za_RPkXRMLw#x=wDncPzLb+0VX77w>&#@pwLIM0|2H^e#(ggB_YI(8(EYBIxgjU2ftvTLhZh}6eo8PiS5znt$?B+T>(yClkRMeJw_5OLUJ+b%<4Np)O zz!YP>V@;XFWPcSm^DgI^kyKMA;hQN3${##@m}=_%Ae&J+)O_-IdYk%dgv(z8U3*+rhQfJbV^6DbOa=o6I*kDU)_}8&9OX0~%?(pnHT80d2i<_!Ov}~t$U%#UuUIC&Q8za?8(Z57wJ>C^TF-|k^_kI8QE}%UZgu)m zx+Brz@%9r+iCUQ|F^UHqot)wjQ1gf*hni)Fk(O7y=1noqvtwqRdG;O^4N1D%dj;)s ztP&MO=OQA6!Z`#C@wi%fcCk^ifz%iXrg_v6(|upR`hHz5UYw|yn4ML> zaN*I0KaRfq$mA~E$;DCd{SI^IB+rhC^>^q-zKyh|QzLM(0gW*1Uwwa7FvITM=JiGP zG&S8E>(k`EnN9JI(_CMGYL4|~xfiD{ZaXZi7=Hdq2GZPlrua(f2I)HY<;k%7NxY+S zc_y{5={p0MBO@YW^o!j{lECTmZXn9Rile`irAhlZIw!QV{Gy|cJ!pmV!|~KHowJ8J9mR6omhy@g z#)N|V>g2-;e|~#Bw7Rylu1nIz#br#BW5nCsz~{)fo*w(T2}4rcvG0r`P8m8nI$VW5 zTtaP`=FwXYohO}VxIH^LQ{?dB^ZWMgGwUkIWgbH8K8=o!MuD@f;ZG|TLe4Tr=0D&& zd9rr?m{y)`;_dA0>?$tQvCn-K0nez^*H=G!4L#xS@?l0W_TQd4$(?RgRfl5q+Md>A zm{)mw&H6v~?oCY6%{QAFX{APV6*^ZmH>)$be80_it;Of-1GS*jch|*A`5q*d5Uu2( zv{_@^GqZ-6SH6cNSp@~#jOG}5%}lCa2sAajFV98yR|bY7pp8t!HZZV841_w%qee8C zCl^LFG^m`=%8)8_n*L-LA``%Fij;li>FLM(j7?MNz<~o=DCJq+)3e>~$&&jVnYC#b z9M13bpxgC2UOhQZ@zt5OG{eXHoJUU1&CQu&zsQ0pDJvsN`0U3z1nS)oh$}J1HKDA! zd3I5k+xC4F=5|=AwrbCkyZXV?H#k^3uX_%c8lLE@XfiigN4Il7GVQ^GUuj&DUVD6* z(fdx+k3M5ny$~82+O)-wfDx3SN2u#_#Vcd^<918)_L=6*W1-UuK|Ip<5c*8(nsK3t z?T5I^wtfBj)v4E4@{iwte@PI)*x1;7f3VfmV7PaG3m*_i^l)SR@xzBdwE1IG(6Hkq z9&c|_RznfUL%1|0=`M&b9bsmcQp!`PBwKXITlh){BaexXWhWMT*jI9&FTxs29Z?^x zV2f=b{FfTJ*Ii= z9IBQ`RS;Xs$Q;i{gRAeiN4;BKoK&D)uY82kWsc(e=*g2tda=cqEiEn1q2DwzOnlM4 zt}Xvsgqot2Y4$8hCzskc+LfqR=%kf@!y@lkx=_?II~$wqv17;Did^#}gbs0BzI=JZ z#*OtTq0G|v>6z4YbY4`IcD7#jz8V^s?>?Z9oxVcF+CV`qPxU^?wlv?F?yc`O zC$V|+<_#M*q?yjn&IaID+)!nmla3!N5jw-F67v#WZE${?#^)e|s%}`#eo}^Pg zHT9;OKsJ2D=gO5U8SX1?SR={Pr%&$+{X~tJn;eL@?93xDGcmiTDdytYY|G9%DvL}5 z;2@jDtR>nHpTo1r>o%X-+KsD%cnNy4XibvEph>KErKFF0QVbScXK+ z^uG`h`Z14HmoO>EL-O7oCr+Kxs0&f3AuVMFV0-N^zewdx-aiz6pZe^CHeZRDho=4V zqT||i>)J9bT2fB4T=4r0)b;w9a?~JtW0T{b(<23)lIWM0B>^EyCGn9urM>nTq%9jl!Z`<$8TsA+)QoDwn_#PA4IGNhN>Y$ z6ZKrDRe7ztbWl}3O%%N=I+gjS@W9|;%#WUrG(00vu#?x!-km#R047+mD+D8{>$^s( zC2B?PUOI}v)ygn=707Ko46sJ1f#|{rH_8cXeNlb&D2GPMeo6D{4estMcF}xT{-|!Z z1vyZh;qKz%)oa!y)+0jfzPCb!jOu{A zA^=k6%^%;`ymQ_z|H_r}`1}*bSfpSOm*EAkZpQW`U7^vATxC*xaN&B_`T2!y;(Kvl z^y2x=7G(%ar}?4i>GC6CgLsr`>%jGMy$kzwbaff_?X%=&VAs(AByIwntdm1$Qo=K5 zUTfvDAF7k63sGK2t?LXGahx!aPSc|c@kJ!WI!zBh^76{c<(>WhwPYUbBGGlgcb$o^5wjoaBs<`STRR#j_6x(xHsV~(P-Ou=y$RB~W;JisIT{L&_7Q-}S) zg9l`qKj!4;x9?f$pf+9&-Hp5sM+pQdYwfsw>dO0Hb#rYbK6)#ppWVRH{$s~LF>AF7o$T}OFG)LCrdpzT6JiL8%-8OH>z3>90+;y_;Yz`L7Jo1> z7#2$X2+(({g#y4XJ^$k)6N@zHaVP-&>g0lnPp%69pov{zb%kKntV$75dCGzWX6m~y zMIwEbyhZ0@NP?gw%VGDj9rMi3gLWlDWI%+fp>YTDDn^L3IsgSaItASI_EzvvaFpA4 z+@S`vDhO041_a_9z&Amwc%a0=K@$iT!=Vmr(sc{t#mVRo$C#PV0czA%1@ogl@Hjd; z8ZJzoRXcU+6gb=ESsfz)J3;$lzp73%hvR56_|{mI1QX!(+>8Ap_x}8|45TR2sQOcPENk~QhZf7*d-W?h-bI^ ziinJBcY$O5bA;8Fty{0$baG;pmzVbdr1=58Wfwv+#c|lgNMWWh;J#y;RZwN9tqLNchM5Z63kF!3WCc{q&vp*p}Q(IkK?ax#Xpf>(D7EC>&yE zua0t^o9O#cuOMde2fI#-_4Vr~@7%fL$=~fV8LGg$Jnp_?F2#kPaCIhEMtO!tcrKP+ z*_q3;eKUpPQ;~BXlH&92r~j+hCb8w)H&HV4tiRp?s`fMXp2f%MKXTH6;;5EB-cj>? zqh$SJ&)v#<8+Rn=3**hozQwsoC(teKkQHxwE>ry#x8=n-HuonY6iQ0+pMU;>PN=cs zW!%riWKbu61iX5y0foXDxS+fJU1`z37Xg=lG?!5*6JZDgmLo^n^XXcA)6>)U?A^!ang~N zhQ3c_Mx$bDcm3-_2U%E38LvFre!^_r=900cr6n~2h^)N1nZG?cFsEe)#pg&}(!*mK zDKA5gpLX+|N&n8-Ec4fIKB||Ndb&+)<1f3QWNMMu<-XUiU-yoVTDTHbPc>O@>D9cj z^GpKU_Qx~h-7AVq9rQaER#Qfj8hpkl?3|5{@VLM1gA5n}jBbv~#y5KL;zdTzOSgus zj#vRIhPuvZItTck`1{%&9UTb{TLL#l`%@@otI!CQP&1|=Eyif1USZYBh?$=nx@s#a z4^fkdO$giEoSdUak4oA0SEg2XXP7=6Zc049W5*6Wz3f>Jx>{sw;w1TcphMF)w^p(0 zNKmPb50<}xCxfRkDo`f(=BK{>MCB^9%7eVa0rrSRr z?awta6de|K=H3PydwW%D>lCBv7pC0q=!%5kqO+(1v@cE7E3}IE(f_%B4Mlu&Ys%%j z$c@BJyZMd+%i!4|Hl|g(PdBPhLw_SGouJcHjOg;ut3d5iRJVmueu6^dl%u^i?>Y6d ztW2sZkXw>e=qkR~;o*HfJ>i}C4)H_NJb5ir2r@;ckbS8EO4|<)GyWp zmZ}Fq&A9pFN}_JQ8h9HuqApTKyE#O_Hfr_i)jY2Ab`Y-Du3t~k5hKJs+{}UX_l2&~ zL!qpPI>{`3_WtHZ)Zbn_D4M-AmE0~?VXyE_MF@r2Gh#NqQX*Jn>e!#JB$-@QKv$L9 zvn*Td(sLBLx7cbApU+`x; zCLO1h8E0RFg+PME0mW#vBfyx;=YOv7fO$8$?>MG(4gjSYR~XM%XinB|9;%OSjFj17 zu0f4x&$fc_RiWkFW*YCA6%b6u10DAvLwxJ0_m=-qTB^D7T*jfOcFq#wJIWN*@9K_v zql_o{?R9q)+ioy5Vb_?Yp#iI*lU?iYd!MZ?#+Kx}it7hz<5iQXcE@#{c zIwGrHL5|zE!Faf43$~;_(iSCc^46OzYWZ zw?VfjiUTN~8qysgJ__=o4E2KF(N_SEW+d~0X`E(8V%c@`G&n|FHGq3R6XXm!e~gb$ z?YLTk;_cOI(=sf9Sf)`uAgwsYDMdcQ^dJUf9NGiV`}gmwC1|`t+{Ys4=F!sPkwg7b zowHX@j_oU%Uw=VL$|oQo0J0Y0N#i!(Q*z9YjbCrxY9j@NO_mU&jNr#-eDev{CWsWf zJ?p9_tC!zgb53o=jr-sl3PtuY3KCx1lUGrGt~YOp@m(%g_vQTOxBmXX|Mm4&d5DNJ z82~NJjtAHcm=@uWfN=DUR@hM}@a^&wu6WRo)vT8m_aQ z4rDsji}?-d0Vu*w!?DpeIb{hOfBp6Ic<~B5wlcr*5(n9mv@g(3H*DJU47|O|eZ^HZ zR!IUHqgJ}n6O{gNC~mEZe3FuqpwjIY3p4LyS@1hA9mc!TG}zh-owE-fJgAbSBcK|m zBE4q)=7#DJAr@&&9h6ZeAm1JUs36iOm{G>e$-n!TE&+1@|9JZq6uRRqED}!-%Ta-z zK!3>t1Y2mcZI?KIp6H;E(v=||65vN$EDqE9REA=M@y%g=etvZQ826RsCaW%g(q3ah zq{zoFU3!V3jn7$0b_Y^sa=cMEWT2I)g^9ZMj*W#{7U5ALwl$_1$}9mTp`<^BWECIg zzN`rbl&(1gZb8H$wPd|Sj-sDu(CRKCz^4&qX7y2WgbtuYMWH6OCh%kWBysNCAaZ8- z^Q*f=sog8$#3M6}FViCG&$)AdZbs5Rf4aa0)=1u0+w=+B&Kt0aYGAxDJ#Ra@>CD`H}P*uw$0d)^te>3VoR| z_A?vn$R*W;RW0GKXaw)n!bA;Hmr9X~Bhk9?Rf?!r2}{|Kn{eaLmyLNZJ3+W;(YEgZ#|h;6%!m(FL} z$jG(HwkxRyE&f44^Pk%<*p9aGP7l_e#RyG-KN*N!@cOrNUKYobuRo!qP-ZViNNph! zJ-1P1O0GDc9Jo5srzQt#NEvLJLGe4v&Mprq<1)xt$9}YE!Cp;asUbmAna~kMMa8^W z@!P96Y~I|2GMDKvu16CD<8dDx_}se|$dRJSdG!Mw85t1WYNCR}o(StAPAZt|s!B;c zz!Xw7@&beX;OEl>K43AYhvSpuFwBa>AnB}?JCY(u$RSDo#VMjklVZoWgVt5?1o`2v!hB@3ZNyB6!O1k6 zwND>H;wi(>>Bo;B6H`+e4gxnd9G9jP3>JQidDD&I!Ej|DHz=ZCe0+Ri#YqMT+R)<2 z7w4eUtVXl| zXNT%J_k7qSq<&}d;HdG}aRolN-*pbHp_Fa&^75LPnaOgyuZSTZX6Pe23?py<4bP)U zfD@#5?jV;jUR+vYIo7%3;J5Db_V)I9ZeLx5zIbS=1{;#@IUe&F2p+(`WOa2lJ@3}1 zACgWVMqvZ5szYdD7!{8tibo(Iqn_L6)N;~2*(0DtU<88j%-OSKU=3xYE!~dt=B%}y zU8S$sm0P+TGg#->2M-3A8?dnT!MUVL^Zd09DeY((jlu&$Sj#@;-G6TTB$uiU5vg z8ID4O?6y_tEUotE`XN{}Ly{w%Ub+ssnt_=bzEYNIhpBJrhYzg6!X0^DXqV_U(Ga@~ zT8u#!wr<^e?J$M%y~dzBAcJNIWC%h{>zQ0$4h{}pn-zeDTKe&$b~G}0 z;8H0(A7ZJr2bC{R-z775S|&TXQY{$|kJR?YYZ^^g#z@_) z%wdN8tNs8RfBY)S*t-Y#Fy{$U+5Y$S|Fr@~oOi(M?K^jxEM;HbN%T~LfS~nX zpb`O{OfjJvISoYyOcWR)Cf$)1n3NPE^WcB0 z8!h1gYd+pByk(1vlQo#7svuu7jj9;I8D)qk2KEAGg7B%m0|UnCp`V<<8ZE<;%bUI2tL{Ka1 zTA;_~y>XyvM>#naA)^w-G@+sZgFrHd&2tz}%E>v!Yw}qf=+{5~xVPmmlrP=YWBM1r zLv|R-9SE(kE5i3ZKY5i0!$si)I0T>H?ks-&|LdOn@4D^&Tc6*@TGo+gFM};?2Kus% z+ee|?VyjT|$-Um=k!Lsb8q*ZeybFR3V=tIvq1FU(UOwZs{a3_P;I*}ZV0**B=)_3{ z*(h-oN(q5Eso(IdW2MK0j>8bAgP{mfpP)x&7LrS$>3T7sLR)jRGWyQJNCZk1oP<6= z<9iEPazDS}@iOAl>2{qBnYP!uIR_;Qngx~n8Q@8WWS0C1P;GgPChGy`>=tG$mcTX1 zpEJ;wi6#(j{tFtlV%B*Bj3-`#KMi6;M96B<%Ca+Z#_XflUZU~KHiKG#CDNQ3d`&<`z%BtLn6O?N~)>zd}1 zUEJp(J}AS3gnYN;E(U`VP(rtTQ;7K<5SS#eJAsljW1Z&QHCIx9M_P)9mZ|v&fTzee z!tx68wK+rNC@`oJ2|1z9v)m=l7bDyn3fsk$!D4G3uN#yE6Vq7R1NFOrDY1OU=?ki>J^LG5UN z;y5C6Tw<3oH-M2V3Idn$PHVhxa~7KbE}LQA%s%(?rwOVVkxMXm%Yem`STqH64`CrZ z(s&OZl145PSZABI9YMIBnz$v9h!fc8p|<7(&2a3Ph}}h8Rns?dKR2KVI6;ItKiydf zq>lf3JE!}x9&u1v^Jcz$xdi2P_|;>wnUAim-NnJdv4kQx)TR(3$l~BId!HStx&L?? zg8`C0mf#6c)TS)J$syS78n8rRS4&CE$zU z7dqRTx6}q84lS%1#nS%En^n2Fxv}UEFgjEW&Ew9wro|6&!IAShyW8!!G`K!74-iFx zUi7CpOnL;|WAI`VsfZOQc7C}ZoNDJ%_sUY(^fYp z<@j=cI!4?g(+$c^&4gb-!sLZGylKxVMLbVmK7?rTP<+Z3Y8U~9`c`cj(pcK`q9jPP z)nQ^`wvFUbOV+I2B_<{Yq4PY|j@EPnML~h+mDRPRF&tpVg=VRbJ!$lRL8?fsYm=qse zMG-Hea@?sTlQ9H)JkpwP)T-cz2U%Bqs|m7&%u+BXwuolMr`@j3?k7Z;@JvE5zP`Jc zIEq^h;x_g=tPq+&W{M<$3mhj&9l}Vr-iv{>`*&lfqZGnKi5Ced!lDiQGrqUiAeh7_ zu{9wa$});0BACJBT4X7dFJZ^DYkbR(VHfL)To*Es%XX7M^r{$_!C%;(u>e~DA0J=+ zKl-j+ETWGot^GkI+#S>Nv}AMa1*H9|R5!G)xSv07(%xrZUjy1;n7uz81RJ`DG{}Y@ zT)8h^yZ}f(Oz;YZZ4;A|^{5}ju9)71ZrwLN9%fn$C{LIry89ckr=9XANR$yfp`14M zkO7nGw|Cbc-@jj+fT3cyLgYI$nOQdH+7A2yt=XH3o9JmmH3PA4)Am@ZpC?LKR^k zBSu7gco^OaKW1QY1u{#3)=JFX&VJ;#AG+8-k;NyOiM^2;f!}3?BL^_2$q@4*a?W71 zCG}8=D2le9kIyA;(0lhNmu&_RLU}K}p}<8N2Xh%U0+UaMql)4H0eltnf9G$B8!WD9 zNgOmNkUs6epcJ8U{=aGknGL{oKBc%s_fIHUK;;19+G@nH0q;LacNStPG}?-(5_&NJ+|NdC z1hc>}gRIkT_hJ%Df{=_XWKOO?g3WpAN*a)LC(8t-mP{EFnhS3_I+j;e5trL=UF73+ zoA<<^Dg%xy1H~yzN&V`;hF-F;E)Zgvv=Y>sYi$En!O=k1(p1s;!9$QM$p*tiLZlT; z9|J!=Ta8j#mUQ|9r{@BD&+u^EE^Z?e9B*(`L~G@I>SEgr81v@t|75WK?|1!YE5Ie; z6L|8f2k;|{y1F_UH|0BwXE{idHdHoy`Y<3OnOcZxHM}yBpc+JSpJ7MhwP+S?<|58Sj53JN$->s!TJh%1oEsx8mp^_2 zmT~)HFur=M4@O;_P4NY5mCMHq24MbQ1rb+h6lq0X-rfPEtM=Y>jg{uNDNyLpzEhgw z@Y{-L?)Usjt9J|Mx_Vp>diT{ipKI&3D9M>!F~+yHo`j$9$vzQ5U1xFeTYKY9%TJ~` ztXet0?bI;m4!0|l8tlcAI`CBJ=>=LH(qB(di-4fuFoa0HQIssd;$n9X8p&HMK}M0h zoZMfQqt_CTaaO3;tguWMV%G*tjL@t9o`X?(&lV^w+QJ_JLKz1QYgUI{e|_NUwHspK zuOl555Rzp`o>LQloSzvD{wnylxRz=iU6RtkQ<+}W{he%>)Qd3jo^a}GZp9vXTeF=nQ z{OR~zPi7Dc{(5EUpR+Kn)M88PLj-@h#e;8J(C}R7Yz8p%QRt(s|3=W`MyR^~!Kspa z81e$9pn34O^bQZR0$!pnk^?#z@y{&>f)kcsNiD;_fCYdh4VYWjRaFpALA^+JG0fWn z0s_RgmRk@_nNUI*Nf=)PtU*{j5%J!?KS$$&4%rCml7t|dP;78NrQG4RX>tzUd5Fime|sfcFo#u}C9~|MIMD z-TE42uO(Nj_wXG$8wpt8`Y?=v*hHM*O*5D&#DiGzbsK2`CqWvm=Y^8yi>w%bC{(PQqII}hs zsL%!^7^pC=VgRa;bIVQ$?+d(7gYFz4aUvqjN({_KPJ{2wzI`xDrWZX&))#h=Uf+LHcEh`47a97<47T~ zB%=~&;S67ebqIVS$I<+WPneJRbrZGKafr|LXUH!;N8mt7fSP8Gff2E3KrTsu^${3V z=nLbn-W)6q2nkMubVLNamG7bdC;w?^^!1pKn_lVc?8?tj+P?g(e390Cc*z5&#d_{3=qh^S{A`g)j$sbj9G z5+%EfpcV)l=`&yo3CIW{^S8F?Q)c?Ue0k~6JxhH;o*q0=ve60aG}B-~!Dgk}4X+ z5XJ^B)HSjRnEGPO(ExnUOeW#=aJ7 nb(Y#5#2HZ&X!3I~a{YZ|-ZD5Mh`NBj+)R z$Vdz>2z{bQ5-TWi?{}I7b6&m!j5hu0c=8}7ESMB#;lNBd+&t}xhhdw8>yqFkkp6~v zbq-P;VNPwD<27sWVJ0SUoM%l_epa?=d(YT8 zRzROU!Zi|Z@?I@D%EY?U>%@~_ID87Z41SD2OFe*MgH#Pnq@oKr=xz<+s=W_y-J;-} zOj*Co66R8w)?d$%BLH5{R&jFQp}Qq~k=QtY`Q`7^Pp+_Nv1+D8OgmdM zt1%gOz*7}mq?V);3y4bK9BL$SvDaHz(V7yWSrFkHZl?x_S+d1XIw6ST6fd~3ABrfM zNMZ{HD!tZ}sX{?=?9Xi0s|Wc_NG8M{AiAAx@;@yQ0n9}9#euv`;v2^;#Upcwsv*Y| ztq`UL?#-}e%PZt4F`H{=TdHD4#hpNkO#{4A5jcCOaYP}s0o@;?-)DQeoGH2QD7SQ) zV95zb4gCP9K?nC-4@?*z@I;${QpkZtPUt+Qb|Wpde;tFwn43)_ItbIhc$PCG@pd{YnfgNfu0WI^B&ZD{QSdJZxJIG*eJ=L z;oCCt!`b2-jPuy{uA=1L+d|d`M{|hRfq{V`5+yVi|4sb(ky%>FN(AkiPk?3qF@9&NDVDMFe76G$WRh$36RRwAU z#C{VpU8fQ8qu(AinpoFhvx*GH5ksHLy0C1*z;_QadmLho$gq$R6y-;_Zt1*#fWU=K z%|t}57q8qksBS1hb(lJ;z+XUw5;B;@uQkAOCaqKxDl+}`K9d=$1vy3tSv4Luq+mB< zrzA%mth$TH&rw&%JrEZk%C~~LvWkiy3cCsxmw`<~5fjb;1UWfc&>`?E#fA>^?ui$$ ztpQMRzum7yb-jz^VQ_$)oFM@f^0ymk_zYb*7KXTZQhPBAK}H+bOa7;WeB&HB zz!0d*+GrreYloH^4NnA7rUdePC^j!jIq6EeBv4EY*8gMd{@>E_|KpkbWhVQn!odBz S=fVFd6ln?hb4h0|-T6N-b1A|A literal 0 HcmV?d00001 From cb4a4ab7a1ab4bc634ae2c2bdcef65206a67406b Mon Sep 17 00:00:00 2001 From: isaacbmiller Date: Tue, 18 Feb 2025 15:44:46 -0500 Subject: [PATCH 2/5] feat: initial small_lm_adapter and xml_adapter impls --- dspy/__init__.py | 2 +- dspy/adapters/__init__.py | 4 + dspy/adapters/base.py | 15 +- dspy/adapters/chat_adapter.py | 3 - dspy/adapters/small_lm_adapter.py | 210 ++++++++++++++++++++ dspy/adapters/xml_adapter.py | 254 ++++++++++++++++++++++++ dspy/clients/lm.py | 4 +- dspy/dsp/utils/settings.py | 1 + tests/adapters/test_small_lm_adapter.py | 117 +++++++++++ tests/adapters/test_xml_adapter.py | 134 +++++++++++++ 10 files changed, 735 insertions(+), 9 deletions(-) create mode 100644 dspy/adapters/small_lm_adapter.py create mode 100644 dspy/adapters/xml_adapter.py create mode 100644 tests/adapters/test_small_lm_adapter.py create mode 100644 tests/adapters/test_xml_adapter.py diff --git a/dspy/__init__.py b/dspy/__init__.py index f5283f30d..875d52a3c 100644 --- a/dspy/__init__.py +++ b/dspy/__init__.py @@ -8,7 +8,7 @@ from dspy.evaluate import Evaluate # isort: skip from dspy.clients import * # isort: skip -from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, Image # isort: skip +from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, Image, XMLAdapter, SmallLMAdapter # isort: skip from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging from dspy.utils.asyncify import asyncify from dspy.utils.saving import load diff --git a/dspy/adapters/__init__.py b/dspy/adapters/__init__.py index e6358a27e..25134f6a8 100644 --- a/dspy/adapters/__init__.py +++ b/dspy/adapters/__init__.py @@ -2,10 +2,14 @@ from dspy.adapters.chat_adapter import ChatAdapter from dspy.adapters.json_adapter import JSONAdapter from dspy.adapters.image_utils import Image +from dspy.adapters.xml_adapter import XMLAdapter +from dspy.adapters.small_lm_adapter import SmallLMAdapter __all__ = [ "Adapter", "ChatAdapter", "JSONAdapter", "Image", + "XMLAdapter", + "SmallLMAdapter", ] diff --git a/dspy/adapters/base.py b/dspy/adapters/base.py index d6b597491..bd6efef54 100644 --- a/dspy/adapters/base.py +++ b/dspy/adapters/base.py @@ -46,9 +46,18 @@ def __call__(self, lm, lm_kwargs, signature, demos, inputs): if isinstance(e, ContextWindowExceededError): # On context window exceeded error, we don't want to retry with a different adapter. raise e - from .json_adapter import JSONAdapter - if not isinstance(self, JSONAdapter): - return JSONAdapter()(lm, lm_kwargs, signature, demos, inputs) + from dspy import settings + if settings.config.backup_adapter is not None: + print(f"Error inside adapter, retrying with backup adapter. {e}") + return settings.config.backup_adapter()(lm, lm_kwargs, signature, demos, inputs) + else: + if settings.config.adapter is not None: + print(f"Error inside adapter with no backup adapter, raising error. Assuming this is intentional. {e}") + raise e + else: + from .json_adapter import JSONAdapter + if not isinstance(self, JSONAdapter): + return JSONAdapter()(lm, lm_kwargs, signature, demos, inputs) raise e @abstractmethod diff --git a/dspy/adapters/chat_adapter.py b/dspy/adapters/chat_adapter.py index 7e320c4a4..e35461dbc 100644 --- a/dspy/adapters/chat_adapter.py +++ b/dspy/adapters/chat_adapter.py @@ -61,9 +61,6 @@ def format(self, signature: Signature, demos: list[dict[str, Any]], inputs: dict return messages def parse(self, signature, completion): - if "" in completion: - print("inside chat adapter parse", completion) - sections = [(None, [])] for line in completion.splitlines(): diff --git a/dspy/adapters/small_lm_adapter.py b/dspy/adapters/small_lm_adapter.py new file mode 100644 index 000000000..3d9662db4 --- /dev/null +++ b/dspy/adapters/small_lm_adapter.py @@ -0,0 +1,210 @@ +import json +from typing import Any, Dict, Type, Optional + +from dspy.signatures.signature import Signature +from dspy.adapters.base import Adapter +from dspy.adapters.json_adapter import JSONAdapter +from dspy.signatures.field import InputField, OutputField +from dspy.signatures.utils import get_dspy_field_type +from dspy.signatures.signature import make_signature +from dspy.clients import LM +# from dspy.predict.predict import Predict + + +class SmallLMAdapter(Adapter): + """ + A two-stage adapter that: + 1. Uses a simpler, more natural prompt for the main LM + 2. Uses a smaller LM with JSON adapter to extract structured data from the response + """ + + def __init__(self, extraction_model): + self.extraction_model = extraction_model + assert isinstance(self.extraction_model, LM) + self.json_adapter = JSONAdapter() + + def format(self, signature: Signature, demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + """Format a more natural prompt for the first stage""" + messages = [] + + # Create a natural description of the task + task_description = self._create_task_description(signature) + messages.append({"role": "system", "content": task_description}) + + # Format demos in a natural way + # print("len(demos)", len(demos)) + for demo in demos: + # print("demo", demo) + messages.extend(self._format_demo(signature, demo)) + + # Format the current input + messages.append({"role": "user", "content": self._format_input(signature, inputs)}) + + return messages + + # This could probably be a method on the Signature class + def _create_signature_with_text_input_and_outputs( + self, + original_signature: Type[Signature], + instructions: Optional[str] = None + ) -> Type[Signature]: + """Create a new signature containing a new 'text' input field plus all output fields. + + Args: + original_signature: The original signature to extract output fields from + instructions: Optional custom instructions for the new signature. If None, + will generate default instructions. + + Returns: + A new Signature type with a text input field and all output fields + """ + # Create new fields dict starting with our new text input + new_fields = { + 'text': (str, InputField()) + } + + # Add all output fields + new_fields.update({ + name: (field.annotation, field) + for name, field in original_signature.output_fields.items() + }) + + if instructions is None: + outputs_str = ", ".join([f"`{field}`" for field in original_signature.output_fields]) + instructions = f"The input is a text that should contain all the necessary information to produce the fields {outputs_str}. \ + Your job is to extract the fields from the text verbatim. Do not repeat the name of the field in your response." + + return make_signature(new_fields, instructions) + + def parse(self, signature: Signature, completion: str) -> Dict[str, Any]: + """ + Two-stage parsing: + 1. Get unstructured completion from main LM + 2. Use smaller LM with JSON adapter to extract structured data + """ + # The signature is supposed to be "input -> {original output fields}" + # Json is implicit with structured outputs and jsonadapter + + extractor_signature = self._create_signature_with_text_input_and_outputs(signature) + + import dspy + extractor = dspy.Predict(extractor_signature) + extractor.demos = [ + dspy.Example( + text=""" + +Okay, let's see. I need to solve the equation 2x + 3 = 7 for x. Hmm, where do I start? Oh right, the goal is to get +x by itself on one side of the equation. + +First, I should get rid of that 3 that's being added to 2x. To do that, I can subtract 3 from both sides of the +equation. That way, the equation stays balanced. So, subtracting 3 from both sides gives me 2x = 7 - 3. Let me +calculate that: 7 minus 3 is 4. So now the equation is 2x = 4. + +Now, I need to solve for x. Since 2 is multiplied by x, I should divide both sides by 2 to isolate x. Dividing both +sides by 2 gives x = 4 / 2. Calculating that, 4 divided by 2 is 2. So x equals 2. Let me check if that makes sense. +Plugging x = 2 back into the original equation: 2*(2) + 3 = 4 + 3 = 7, which matches the right side. Yep, that +works. So the solution is x = 2. + + +reasoning: To solve for x, first subtract 3 from both sides to isolate the term with x, resulting in 2x = 4. Then +divide both sides by 2 to find x = 2. +answer: 2""", + reasoning="To solve for x, first subtract 3 from both sides to isolate the term with x, resulting in 2x = 4. Then divide both sides by 2 to find x = 2.", + answer="2" + ).with_inputs("text") + ] + + try: + # Call the smaller LM to extract JSON + # import rich + # rich.print(completion) + + with dspy.settings.context(adapter=self.json_adapter, lm=self.extraction_model): + extracted_data = extractor(text=completion) + # rich.print(extracted_data) + # Validate the extracted data matches our signature + # if not all(field in extracted_data for field in signature.output_fields): + # missing = set(signature.output_fields) - set(extracted_data) + # raise ValueError(f"Missing required fields in extracted data: {missing}") + + return extracted_data + + except Exception as e: + raise ValueError(f"Failed to parse response: {str(e)}\nOriginal completion: {completion}") + + def _create_task_description(self, signature: Signature) -> str: + """Create a natural description of the task based on the signature""" + parts = [] + + # Get field descriptions + input_descs = [ + f"{name}: {field.json_schema_extra.get('desc', name)}" + for name, field in signature.input_fields.items() + ] + output_descs = [ + f"{name}: {field.json_schema_extra.get('desc', name)}" + for name, field in signature.output_fields.items() + ] + + parts.append("You are a helpful assistant that can solve tasks based on user input.") + parts.append(f"For each input, which includes: {', '.join(input_descs)}") + parts.append(f"You should provide: {', '.join(output_descs)}") + + if signature.instructions: + parts.append(f"\nSpecific instructions: {signature.instructions}") + + return "\n".join(parts) + + def _format_input(self, signature: Signature, values: Dict[str, Any]) -> str: + """Format input in a natural way""" + parts = [] + + for name, field in signature.input_fields.items(): + if name in values: + parts.append(f"{name}: {values[name]}") + + return "\n".join(parts) + + def _format_demo(self, signature: Signature, values: Dict[str, Any]) -> list[dict[str, str]]: + """Format a demo example in a natural way""" + messages = [] + + # Format input + if any(k in values for k in signature.input_fields): + messages.append({ + "role": "user", + "content": self._format_input(signature, values) + }) + + # Format output if present + if any(k in values for k in signature.output_fields): + output_parts = [] + for name, field in signature.output_fields.items(): + if name in values: + desc = field.json_schema_extra.get('desc', name) + output_parts.append(f"{desc}: {values[name]}") + + if output_parts: + messages.append({ + "role": "assistant", + "content": "\n".join(output_parts) + }) + + return messages + + def _create_extraction_prompt(self, signature: Signature, text: str) -> str: + """Create a prompt for the extraction phase""" + parts = [] + + parts.append("Extract the following information from the text into JSON format:") + for name, field in signature.output_fields.items(): + desc = field.json_schema_extra.get('desc', name) + field_type = field.annotation.__name__ if hasattr(field.annotation, '__name__') else str(field.annotation) + parts.append(f"- {name} ({field_type}): {desc}") + + parts.append("\nText to extract from:") + parts.append(text) + + parts.append("\nProvide the output in valid JSON format with these exact field names.") + + return "\n".join(parts) \ No newline at end of file diff --git a/dspy/adapters/xml_adapter.py b/dspy/adapters/xml_adapter.py new file mode 100644 index 000000000..bbef31b7e --- /dev/null +++ b/dspy/adapters/xml_adapter.py @@ -0,0 +1,254 @@ +import ast +import enum +import inspect +import json +import re +import textwrap +import xml.etree.ElementTree as ET +from collections.abc import Mapping +from itertools import chain +from typing import Any, Dict, Literal, NamedTuple + +import pydantic +from pydantic import TypeAdapter +from pydantic.fields import FieldInfo + +from dspy.adapters.base import Adapter +from dspy.adapters.utils import find_enum_member, format_field_value, get_annotation_name +from dspy.signatures.field import OutputField +from dspy.signatures.signature import Signature, SignatureMeta +from dspy.signatures.utils import get_dspy_field_type +from dspy.adapters.image_utils import try_expand_image_tags + + +class FieldInfoWithName(NamedTuple): + name: str + info: FieldInfo + + +# Built-in field indicating that a chat turn has been completed. +BuiltInCompletedOutputFieldInfo = FieldInfoWithName(name="completed", info=OutputField()) + + +class XMLAdapter(Adapter): + def format(self, signature: Signature, demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + messages: list[dict[str, Any]] = [] + + # Extract demos where some of the output_fields are not filled in. + incomplete_demos = [ + demo for demo in demos if not all(k in demo and demo[k] is not None for k in signature.fields) + ] + complete_demos = [demo for demo in demos if demo not in incomplete_demos] + # Filter out demos that don't have at least one input and one output field. + incomplete_demos = [ + demo + for demo in incomplete_demos + if any(k in demo for k in signature.input_fields) and any(k in demo for k in signature.output_fields) + ] + + demos = incomplete_demos + complete_demos + + prepared_instructions = prepare_instructions(signature) + messages.append({"role": "system", "content": prepared_instructions}) + for demo in demos: + messages.append(format_turn(signature, demo, role="user", incomplete=demo in incomplete_demos)) + messages.append(format_turn(signature, demo, role="assistant", incomplete=demo in incomplete_demos)) + + messages.append(format_turn(signature, inputs, role="user")) + messages = try_expand_image_tags(messages) + return messages + + def parse(self, signature, completion): + fields = {} + for field_name in signature.output_fields: + # if field name is reasoning, also search for think + # Simple regex pattern to match content between XML tags + pattern = f"<{field_name}>(.*?)" + match = re.search(pattern, completion, re.DOTALL) + + if field_name == "reasoning" and not match: + pattern = r"(.*?)" + match = re.search(pattern, completion, re.DOTALL) + + if not match: + raise ValueError(f"Missing required field: {field_name}") + + try: + # Extract the content and strip whitespace + value = match.group(1).strip() + fields[field_name] = parse_value(value, signature.output_fields[field_name].annotation) + except Exception as e: + raise ValueError(f"Error parsing field {field_name}: {str(e)} from value: {value}") + + if fields.keys() != signature.output_fields.keys(): + raise ValueError(f"Expected {signature.output_fields.keys()} but got {fields.keys()}") + + return fields + + def format_finetune_data(self, signature, demos, inputs, outputs): + # Get system + user messages + messages = self.format(signature, demos, inputs) + + # Add the assistant message + role = "assistant" + incomplete = False + assistant_message = format_turn(signature, outputs, role, incomplete) + messages.append(assistant_message) + + # Wrap the messages in a dictionary with a "messages" key + return dict(messages=messages) + + def format_turn(self, signature, values, role, incomplete=False): + return format_turn(signature, values, role, incomplete) + + def format_fields(self, signature, values, role): + fields_with_values = { + FieldInfoWithName(name=field_name, info=field_info): values.get( + field_name, "Not supplied for this particular example." + ) + for field_name, field_info in signature.fields.items() + if field_name in values + } + return format_fields(fields_with_values) + + +def format_fields(fields_with_values: Dict[FieldInfoWithName, Any]) -> str: + """ + Formats the values of the specified fields as XML tags. + """ + output = [] + for field, field_value in fields_with_values.items(): + formatted_field_value = format_field_value(field_info=field.info, value=field_value) + output.append(f"<{field.name}>{formatted_field_value}") + + return "\n\n".join(output).strip() + + +def parse_value(value, annotation): + if annotation is str: + return str(value) + + parsed_value = value + + if isinstance(annotation, enum.EnumMeta): + return find_enum_member(annotation, value) + elif isinstance(value, str): + try: + parsed_value = json.loads(value) + except json.JSONDecodeError: + try: + parsed_value = ast.literal_eval(value) + except (ValueError, SyntaxError): + parsed_value = value + + return TypeAdapter(annotation).validate_python(parsed_value) + + +def format_turn(signature, values, role, incomplete=False): + """ + Constructs a new message ("turn") to append to a chat thread. The message is formatted + using XML tags for each field. + """ + if role == "user": + fields = signature.input_fields + message_prefix = "This is an example of the task, though some input or output fields are not supplied." if incomplete else "" + else: + # Add the completed field for the assistant turn + fields = {**signature.output_fields, BuiltInCompletedOutputFieldInfo.name: BuiltInCompletedOutputFieldInfo.info} + values = {**values, BuiltInCompletedOutputFieldInfo.name: ""} + message_prefix = "" + + if not incomplete and not set(values).issuperset(fields.keys()): + raise ValueError(f"Expected {fields.keys()} but got {values.keys()}") + + messages = [] + if message_prefix: + messages.append(message_prefix) + + field_messages = format_fields( + {FieldInfoWithName(name=k, info=v): values.get(k, "Not supplied for this particular example.") + for k, v in fields.items()}, + ) + messages.append(field_messages) + + # Add output field instructions for user messages + if role == "user" and signature.output_fields: + type_info = lambda v: f" (must be formatted as a valid Python {get_annotation_name(v.annotation)})" if v.annotation is not str else "" + field_instructions = "Respond with the corresponding output fields, using XML tags for each field. For example: " + \ + ", ".join(f"<{f}>{type_info(v)}" for f, v in signature.output_fields.items()) + \ + ", and then ending with ." + messages.append(field_instructions) + + joined_messages = "\n\n".join(msg for msg in messages) + return {"role": role, "content": joined_messages} + + +def prepare_schema(type_): + schema = pydantic.TypeAdapter(type_).json_schema() + schema = move_type_to_front(schema) + return schema + + +def move_type_to_front(d): + # Move the 'type' key to the front of the dictionary, recursively, for LLM readability/adherence. + if isinstance(d, Mapping): + return {k: move_type_to_front(v) for k, v in sorted(d.items(), key=lambda item: (item[0] != "type", item[0]))} + elif isinstance(d, list): + return [move_type_to_front(item) for item in d] + return d + + +def prepare_instructions(signature: SignatureMeta): + parts = [] + parts.append("Your input fields are:\n" + enumerate_fields(signature.input_fields)) + parts.append("Your output fields are:\n" + enumerate_fields(signature.output_fields)) + parts.append("All interactions will be structured using XML tags for each field. For example:") + + def field_metadata(field_name, field_info): + field_type = field_info.annotation + + if get_dspy_field_type(field_info) == "input" or field_type is str: + desc = "" + elif field_type is bool: + desc = "must be True or False" + elif field_type in (int, float): + desc = f"must be a single {field_type.__name__} value" + elif inspect.isclass(field_type) and issubclass(field_type, enum.Enum): + desc = f"must be one of: {'; '.join(field_type.__members__)}" + elif hasattr(field_type, "__origin__") and field_type.__origin__ is Literal: + desc = ( + f"must be one of: {'; '.join([str(x) for x in field_type.__args__])}" + ) + else: + desc = "must be parseable according to the following JSON schema: " + desc += json.dumps(prepare_schema(field_type), ensure_ascii=False) + + desc = (" " * 8) + f"# note: the value you produce {desc}" if desc else "" + return f"{{{field_name}}}{desc}" + + def format_signature_fields_for_instructions(fields: Dict[str, FieldInfo]): + return format_fields( + fields_with_values={ + FieldInfoWithName(name=field_name, info=field_info): field_metadata(field_name, field_info) + for field_name, field_info in fields.items() + }, + ) + + parts.append(format_signature_fields_for_instructions(signature.input_fields)) + parts.append(format_signature_fields_for_instructions(signature.output_fields)) + parts.append(format_fields({BuiltInCompletedOutputFieldInfo: ""})) + instructions = textwrap.dedent(signature.instructions) + objective = ("\n" + " " * 8).join([""] + instructions.splitlines()) + parts.append(f"In adhering to this structure, your objective is: {objective}") + + return "\n\n".join(parts).strip() + + +def enumerate_fields(fields: dict) -> str: + parts = [] + for idx, (k, v) in enumerate(fields.items()): + parts.append(f"{idx+1}. `{k}`") + parts[-1] += f" ({get_annotation_name(v.annotation)})" + parts[-1] += f": {v.json_schema_extra['desc']}" if v.json_schema_extra["desc"] != f"${{{k}}}" else "" + + return "\n".join(parts).strip() \ No newline at end of file diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py index 549029d50..4f32a24ac 100644 --- a/dspy/clients/lm.py +++ b/dspy/clients/lm.py @@ -90,8 +90,8 @@ def __init__( if model_pattern: # Handle OpenAI reasoning models (o1, o3) assert ( - max_tokens >= 5000 and temperature == 1.0 - ), "OpenAI's reasoning models require passing temperature=1.0 and max_tokens >= 5000 to `dspy.LM(...)`" + max_tokens >= 10000 and temperature == 1.0 + ), "OpenAI's reasoning models require passing temperature=1.0 and max_tokens >= 10000 to `dspy.LM(...)`" self.kwargs = dict(temperature=temperature, max_completion_tokens=max_tokens, **kwargs) else: self.kwargs = dict(temperature=temperature, max_tokens=max_tokens, **kwargs) diff --git a/dspy/dsp/utils/settings.py b/dspy/dsp/utils/settings.py index 98d96d6ac..f00577f68 100644 --- a/dspy/dsp/utils/settings.py +++ b/dspy/dsp/utils/settings.py @@ -7,6 +7,7 @@ DEFAULT_CONFIG = dotdict( lm=None, adapter=None, + backup_adapter=None, rm=None, branch_idx=0, trace=[], diff --git a/tests/adapters/test_small_lm_adapter.py b/tests/adapters/test_small_lm_adapter.py new file mode 100644 index 000000000..814cd6b1e --- /dev/null +++ b/tests/adapters/test_small_lm_adapter.py @@ -0,0 +1,117 @@ +from typing import Literal +from unittest import mock + +import pytest + +import dspy + + +def test_small_lm_adapter_formats_simple_prompt(): + """Test that the first stage prompt is simplified and more natural""" + + class TestSignature(dspy.Signature): + question: str = dspy.InputField(desc="The math question to solve") + solution: str = dspy.OutputField(desc="Step by step solution") + answer: float = dspy.OutputField(desc="The final numerical answer") + + program = dspy.Predict(TestSignature) + dspy.configure(lm=dspy.LM(model="openai/gpt4"), adapter=dspy.SmallLMAdapter()) + + with mock.patch("litellm.completion") as mock_completion: + program(question="What is 5 + 7?") + + mock_completion.assert_called_once() + _, call_kwargs = mock_completion.call_args + content = call_kwargs["messages"][0]["content"] + + # Check that the first stage prompt is more natural + assert "Please solve this math problem" in content + assert "question:" in content.lower() + assert "What is 5 + 7?" in content + assert "Provide a step by step solution and the final numerical answer." in content + + +def test_small_lm_adapter_extracts_with_json(): + """Test that the second stage uses JSON adapter to extract structured data""" + + class TestSignature(dspy.Signature): + question: str = dspy.InputField(desc="The math question to solve") + solution: str = dspy.OutputField(desc="Step by step solution") + answer: float = dspy.OutputField(desc="The final numerical answer") + + # Mock first LM response + first_response = """ + Let me solve this step by step: + 1. First, we identify the numbers: 5 and 7 + 2. Then, we add them together: 5 + 7 + 3. The result is 12 + + Therefore, 5 + 7 = 12 + """ + + adapter = dspy.SmallLMAdapter() + dspy.configure(adapter=adapter, lm=dspy.LM(model="openai/gpt4")) + + # Mock the second stage LM call + with mock.patch("litellm.completion") as mock_completion: + mock_completion.return_value.choices[0].message.content = """ + { + "solution": "1. First, we identify the numbers: 5 and 7\\n2. Then, we add them together: 5 + 7\\n3. The result is 12", + "answer": 12.0 + } + """ + result = adapter.parse(TestSignature, first_response) + + assert result["solution"].startswith("1. First") + assert result["answer"] == 12.0 + + +def test_small_lm_adapter_handles_complex_types(): + """Test that the adapter can handle more complex output types through JSON extraction""" + + class ComplexSignature(dspy.Signature): + input_text: str = dspy.InputField() + tags: list[str] = dspy.OutputField(desc="List of relevant tags") + confidence: float = dspy.OutputField(desc="Confidence score") + + # Mock first LM response + first_response = """ + This text appears to be about machine learning and neural networks. + I would tag it with: AI, deep learning, and neural networks. + I'm quite confident about this classification, around 85-90%. + """ + + adapter = dspy.SmallLMAdapter() + dspy.configure(adapter=adapter, lm=dspy.LM(model="openai/gpt4")) + # Mock the second stage LM call + with mock.patch("litellm.completion") as mock_completion: + mock_completion.return_value.choices[0].message.content = """ + { + "tags": ["AI", "deep learning", "neural networks"], + "confidence": 0.87 + } + """ + result = adapter.parse(ComplexSignature, first_response) + + assert len(result["tags"]) == 3 + assert "AI" in result["tags"] + assert isinstance(result["confidence"], float) + assert 0 <= result["confidence"] <= 1 + + +def test_small_lm_adapter_handles_errors(): + """Test that the adapter properly handles errors in both stages""" + + class TestSignature(dspy.Signature): + question: str = dspy.InputField() + answer: str = dspy.OutputField() + + # Test invalid first stage response + first_response = "Sorry, I don't know how to help with that." + + adapter = dspy.SmallLMAdapter() + + with mock.patch("litellm.completion") as mock_completion: + mock_completion.return_value.choices[0].message.content = "{invalid json}" + with pytest.raises(ValueError, match="Failed to parse response"): + adapter.parse(TestSignature, first_response) \ No newline at end of file diff --git a/tests/adapters/test_xml_adapter.py b/tests/adapters/test_xml_adapter.py new file mode 100644 index 000000000..03aa5ffb5 --- /dev/null +++ b/tests/adapters/test_xml_adapter.py @@ -0,0 +1,134 @@ +from typing import Literal +from unittest import mock + +import pytest + +import dspy + + +@pytest.mark.parametrize( + "input_literal, output_literal, input_value, expected_input_str, expected_output_str", + [ + # Scenario 1: double quotes escaped within strings + ( + Literal["one", "two", 'three"'], + Literal["four", "five", 'six"'], + "two", + "two", + "must be one of: four; five; six\"", + ), + # Scenario 2: Single quotes inside strings + ( + Literal["she's here", "okay", "test"], + Literal["done", "maybe'soon", "later"], + "she's here", + "she's here", + "must be one of: done; maybe'soon; later", + ), + # Scenario 3: Strings containing both single and double quotes + ( + Literal["both\"and'", "another"], + Literal["yet\"another'", "plain"], + "another", + "another", + "must be one of: yet\"another'; plain", + ), + # Scenario 4: Basic XML parsing test + ( + Literal["foo", "bar"], + Literal["baz", "qux"], + "foo", + "foo", + "must be one of: baz; qux", + ), + # Scenario 5: Mixed types + ( + Literal[1, "bar"], + Literal[True, 3, "foo"], + "bar", + "bar", + "must be one of: True; 3; foo", + ), + ], +) +def test_xml_adapter_formats_literals_as_expected( + input_literal, output_literal, input_value, expected_input_str, expected_output_str +): + """ + This test verifies that when we declare Literal fields, the XML adapter properly + formats them with XML tags and parses them correctly. + """ + + class TestSignature(dspy.Signature): + input_text: input_literal = dspy.InputField() + output_text: output_literal = dspy.OutputField() + + program = dspy.Predict(TestSignature) + + dspy.configure(lm=dspy.LM(model="openai/gpt4o"), adapter=dspy.XMLAdapter()) + + with mock.patch("litellm.completion") as mock_completion: + program(input_text=input_value) + + mock_completion.assert_called_once() + _, call_kwargs = mock_completion.call_args + content = call_kwargs["messages"][0]["content"] + + assert expected_input_str in content + assert expected_output_str in content + + +def test_xml_adapter_basic_parsing(): + """Test that the XML adapter can correctly parse basic XML responses""" + + class SimpleSignature(dspy.Signature): + question: str = dspy.InputField() + answer: str = dspy.OutputField() + confidence: float = dspy.OutputField() + + xml_response = """ + This is a test answer + 0.95 + + """ + + adapter = dspy.XMLAdapter() + result = adapter.parse(SimpleSignature, xml_response) + + assert result["answer"] == "This is a test answer" + assert result["confidence"] == 0.95 + + +def test_xml_adapter_handles_nested_xml(): + """Test that the XML adapter properly handles XML content within field values""" + + class NestedSignature(dspy.Signature): + input: str = dspy.InputField() + output: str = dspy.OutputField() + + xml_response = """ + Here is some text with embedded XML that should be preserved + + """ + + adapter = dspy.XMLAdapter() + result = adapter.parse(NestedSignature, xml_response) + + assert result["output"] == "Here is some text with embedded XML that should be preserved" + + +def test_xml_adapter_error_handling(): + """Test that the XML adapter properly handles malformed XML""" + + class SimpleSignature(dspy.Signature): + question: str = dspy.InputField() + answer: str = dspy.OutputField() + + xml_response = """ + Incomplete tag + + """ + + adapter = dspy.XMLAdapter() + with pytest.raises(ValueError, match="Malformed XML"): + adapter.parse(SimpleSignature, xml_response) \ No newline at end of file From 44f8e183cc48c16e57bc5817d71c5dd497770c10 Mon Sep 17 00:00:00 2001 From: isaacbmiller Date: Tue, 18 Feb 2025 15:46:09 -0500 Subject: [PATCH 3/5] reliability tests --- tests/reliability/conftest.py | 1 + tests/reliability/reliability_conf.yaml | 6 ++++++ tests/reliability/test_pydantic_models.py | 8 ++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/reliability/conftest.py b/tests/reliability/conftest.py index 9aaef2f28..8dfcf7e2f 100644 --- a/tests/reliability/conftest.py +++ b/tests/reliability/conftest.py @@ -21,6 +21,7 @@ "llama-3.1-70b-instruct", "llama-3.1-8b-instruct", "llama-3.2-3b-instruct", + "deepseek-r1", ] diff --git a/tests/reliability/reliability_conf.yaml b/tests/reliability/reliability_conf.yaml index ef33cd94e..1a18428e8 100644 --- a/tests/reliability/reliability_conf.yaml +++ b/tests/reliability/reliability_conf.yaml @@ -73,3 +73,9 @@ model_list: # model: "/" # api_key: "api key" # api_base: "" + - model_name: "deepseek-r1" + litellm_params: + # model: "/" + # api_key: "api key" + # max_tokens: 10000 + diff --git a/tests/reliability/test_pydantic_models.py b/tests/reliability/test_pydantic_models.py index 9e62ce517..1853fc885 100644 --- a/tests/reliability/test_pydantic_models.py +++ b/tests/reliability/test_pydantic_models.py @@ -90,18 +90,18 @@ class ExtractEntityFromDescription(dspy.Signature): assert_program_output_correct( program_input=description, program_output=extracted_entity.entity_hu, - grading_guidelines="The translation of the text into English should be equivalent to 'coffee'", + grading_guidelines="The translation of the extracted entity into English should be equivalent to 'coffee'", ) assert_program_output_correct( program_input=description, - program_output=extracted_entity.entity_hu, - grading_guidelines="The text should be equivalent to 'coffee'", + program_output=extracted_entity.entity_en, + grading_guidelines="The extracted entity should be equivalent to 'coffee'", ) assert_program_output_correct( program_input=description, program_output=extracted_entity.categories, grading_guidelines=( - "The text should contain English language categories that apply to the word 'coffee'." + "The extracted entity should be associated with English language categories that apply to the word 'coffee'." " The categories should be separated by the character '|'." ), ) From 05e18d0181e9723362c1530a6d6ccf9f67688977 Mon Sep 17 00:00:00 2001 From: isaacbmiller Date: Tue, 18 Feb 2025 15:48:52 -0500 Subject: [PATCH 4/5] add xml adapter to reliabiliry tests --- tests/reliability/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/reliability/utils.py b/tests/reliability/utils.py index 349192fe3..a8ea3b7eb 100644 --- a/tests/reliability/utils.py +++ b/tests/reliability/utils.py @@ -148,5 +148,7 @@ def get_adapter(reliability_conf: ReliabilityTestConf) -> dspy.Adapter: return dspy.ChatAdapter() elif reliability_conf.adapter.lower() == "json": return dspy.JSONAdapter() + elif reliability_conf.adapter.lower() == "xml": + return dspy.XMLAdapter() else: raise ValueError(f"Unknown adapter specification '{reliability_conf.adapter}' in reliability_conf.yaml") From 9c005af1f086e1d034e7f09d8bc0bdafb318642a Mon Sep 17 00:00:00 2001 From: isaacbmiller Date: Mon, 24 Feb 2025 15:12:04 -0500 Subject: [PATCH 5/5] remove old graph --- output.png | Bin 16570 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 output.png diff --git a/output.png b/output.png deleted file mode 100644 index 6ea0cd6038b36fea712ada469e2d54b84097ee4f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16570 zcmeHuXINBOn{BDBb}LG&jR6A&FdzaFC1XH9qU4N$j06cH+0bG_LWv+CpptXWNktSS zNzR~>DS{Fu-?h7EzWJV+J9D48Kjzo;(_O7paq67C->}wt*M6udFS%(0;|2Z7ayZVeyYouv^&}bNJb4*x)L^Sxaf!QYiG7$$zV2#G*|p6!F8- z=T58Kd@xN1m=@U!-K?^~eQ8V@&&7T(Y zE(dOiJZxsF$sU_KZfYs!P@h3pwbgpfmj2oM_niw?J^EEAD%9Jyf6utFk?3UT7WU+Y zz&cO_UHeKr+ky~-+J|-f5OOnVf*90+x~?vb2jYiYkS3h+0`2t z6LVCPZ9A>Ew>M%d%Y_)_X!-fk?C!b~#m=*q(_@|5nP!dD^mJSQ?y^ZGB_(Q+f?1vkU$xJ`lE2OslvBb0E>ecrOYg|n~mekiP^ILVv@~`xkKW(}?@pe6vLCeQa z&yE|WYN!<4ydD`9rPaQh*UT^Ss)$*`QE8=ErO2rc+Zw~1Cr@_sf4Y)tGE^5yUtF)r zW7%=)gnp6m$Azk~lQn}o7ndBm8XHynK0Q~%|vMxu7M{Ahc2GnS0U z<)?M4&|sySK7jKvpz2t1b05rA>V%c>C>l{9XYQUj;qC3v}f^6ZfDcjudd$gyzHx) za_RPkXRMLw#x=wDncPzLb+0VX77w>&#@pwLIM0|2H^e#(ggB_YI(8(EYBIxgjU2ftvTLhZh}6eo8PiS5znt$?B+T>(yClkRMeJw_5OLUJ+b%<4Np)O zz!YP>V@;XFWPcSm^DgI^kyKMA;hQN3${##@m}=_%Ae&J+)O_-IdYk%dgv(z8U3*+rhQfJbV^6DbOa=o6I*kDU)_}8&9OX0~%?(pnHT80d2i<_!Ov}~t$U%#UuUIC&Q8za?8(Z57wJ>C^TF-|k^_kI8QE}%UZgu)m zx+Brz@%9r+iCUQ|F^UHqot)wjQ1gf*hni)Fk(O7y=1noqvtwqRdG;O^4N1D%dj;)s ztP&MO=OQA6!Z`#C@wi%fcCk^ifz%iXrg_v6(|upR`hHz5UYw|yn4ML> zaN*I0KaRfq$mA~E$;DCd{SI^IB+rhC^>^q-zKyh|QzLM(0gW*1Uwwa7FvITM=JiGP zG&S8E>(k`EnN9JI(_CMGYL4|~xfiD{ZaXZi7=Hdq2GZPlrua(f2I)HY<;k%7NxY+S zc_y{5={p0MBO@YW^o!j{lECTmZXn9Rile`irAhlZIw!QV{Gy|cJ!pmV!|~KHowJ8J9mR6omhy@g z#)N|V>g2-;e|~#Bw7Rylu1nIz#br#BW5nCsz~{)fo*w(T2}4rcvG0r`P8m8nI$VW5 zTtaP`=FwXYohO}VxIH^LQ{?dB^ZWMgGwUkIWgbH8K8=o!MuD@f;ZG|TLe4Tr=0D&& zd9rr?m{y)`;_dA0>?$tQvCn-K0nez^*H=G!4L#xS@?l0W_TQd4$(?RgRfl5q+Md>A zm{)mw&H6v~?oCY6%{QAFX{APV6*^ZmH>)$be80_it;Of-1GS*jch|*A`5q*d5Uu2( zv{_@^GqZ-6SH6cNSp@~#jOG}5%}lCa2sAajFV98yR|bY7pp8t!HZZV841_w%qee8C zCl^LFG^m`=%8)8_n*L-LA``%Fij;li>FLM(j7?MNz<~o=DCJq+)3e>~$&&jVnYC#b z9M13bpxgC2UOhQZ@zt5OG{eXHoJUU1&CQu&zsQ0pDJvsN`0U3z1nS)oh$}J1HKDA! zd3I5k+xC4F=5|=AwrbCkyZXV?H#k^3uX_%c8lLE@XfiigN4Il7GVQ^GUuj&DUVD6* z(fdx+k3M5ny$~82+O)-wfDx3SN2u#_#Vcd^<918)_L=6*W1-UuK|Ip<5c*8(nsK3t z?T5I^wtfBj)v4E4@{iwte@PI)*x1;7f3VfmV7PaG3m*_i^l)SR@xzBdwE1IG(6Hkq z9&c|_RznfUL%1|0=`M&b9bsmcQp!`PBwKXITlh){BaexXWhWMT*jI9&FTxs29Z?^x zV2f=b{FfTJ*Ii= z9IBQ`RS;Xs$Q;i{gRAeiN4;BKoK&D)uY82kWsc(e=*g2tda=cqEiEn1q2DwzOnlM4 zt}Xvsgqot2Y4$8hCzskc+LfqR=%kf@!y@lkx=_?II~$wqv17;Did^#}gbs0BzI=JZ z#*OtTq0G|v>6z4YbY4`IcD7#jz8V^s?>?Z9oxVcF+CV`qPxU^?wlv?F?yc`O zC$V|+<_#M*q?yjn&IaID+)!nmla3!N5jw-F67v#WZE${?#^)e|s%}`#eo}^Pg zHT9;OKsJ2D=gO5U8SX1?SR={Pr%&$+{X~tJn;eL@?93xDGcmiTDdytYY|G9%DvL}5 z;2@jDtR>nHpTo1r>o%X-+KsD%cnNy4XibvEph>KErKFF0QVbScXK+ z^uG`h`Z14HmoO>EL-O7oCr+Kxs0&f3AuVMFV0-N^zewdx-aiz6pZe^CHeZRDho=4V zqT||i>)J9bT2fB4T=4r0)b;w9a?~JtW0T{b(<23)lIWM0B>^EyCGn9urM>nTq%9jl!Z`<$8TsA+)QoDwn_#PA4IGNhN>Y$ z6ZKrDRe7ztbWl}3O%%N=I+gjS@W9|;%#WUrG(00vu#?x!-km#R047+mD+D8{>$^s( zC2B?PUOI}v)ygn=707Ko46sJ1f#|{rH_8cXeNlb&D2GPMeo6D{4estMcF}xT{-|!Z z1vyZh;qKz%)oa!y)+0jfzPCb!jOu{A zA^=k6%^%;`ymQ_z|H_r}`1}*bSfpSOm*EAkZpQW`U7^vATxC*xaN&B_`T2!y;(Kvl z^y2x=7G(%ar}?4i>GC6CgLsr`>%jGMy$kzwbaff_?X%=&VAs(AByIwntdm1$Qo=K5 zUTfvDAF7k63sGK2t?LXGahx!aPSc|c@kJ!WI!zBh^76{c<(>WhwPYUbBGGlgcb$o^5wjoaBs<`STRR#j_6x(xHsV~(P-Ou=y$RB~W;JisIT{L&_7Q-}S) zg9l`qKj!4;x9?f$pf+9&-Hp5sM+pQdYwfsw>dO0Hb#rYbK6)#ppWVRH{$s~LF>AF7o$T}OFG)LCrdpzT6JiL8%-8OH>z3>90+;y_;Yz`L7Jo1> z7#2$X2+(({g#y4XJ^$k)6N@zHaVP-&>g0lnPp%69pov{zb%kKntV$75dCGzWX6m~y zMIwEbyhZ0@NP?gw%VGDj9rMi3gLWlDWI%+fp>YTDDn^L3IsgSaItASI_EzvvaFpA4 z+@S`vDhO041_a_9z&Amwc%a0=K@$iT!=Vmr(sc{t#mVRo$C#PV0czA%1@ogl@Hjd; z8ZJzoRXcU+6gb=ESsfz)J3;$lzp73%hvR56_|{mI1QX!(+>8Ap_x}8|45TR2sQOcPENk~QhZf7*d-W?h-bI^ ziinJBcY$O5bA;8Fty{0$baG;pmzVbdr1=58Wfwv+#c|lgNMWWh;J#y;RZwN9tqLNchM5Z63kF!3WCc{q&vp*p}Q(IkK?ax#Xpf>(D7EC>&yE zua0t^o9O#cuOMde2fI#-_4Vr~@7%fL$=~fV8LGg$Jnp_?F2#kPaCIhEMtO!tcrKP+ z*_q3;eKUpPQ;~BXlH&92r~j+hCb8w)H&HV4tiRp?s`fMXp2f%MKXTH6;;5EB-cj>? zqh$SJ&)v#<8+Rn=3**hozQwsoC(teKkQHxwE>ry#x8=n-HuonY6iQ0+pMU;>PN=cs zW!%riWKbu61iX5y0foXDxS+fJU1`z37Xg=lG?!5*6JZDgmLo^n^XXcA)6>)U?A^!ang~N zhQ3c_Mx$bDcm3-_2U%E38LvFre!^_r=900cr6n~2h^)N1nZG?cFsEe)#pg&}(!*mK zDKA5gpLX+|N&n8-Ec4fIKB||Ndb&+)<1f3QWNMMu<-XUiU-yoVTDTHbPc>O@>D9cj z^GpKU_Qx~h-7AVq9rQaER#Qfj8hpkl?3|5{@VLM1gA5n}jBbv~#y5KL;zdTzOSgus zj#vRIhPuvZItTck`1{%&9UTb{TLL#l`%@@otI!CQP&1|=Eyif1USZYBh?$=nx@s#a z4^fkdO$giEoSdUak4oA0SEg2XXP7=6Zc049W5*6Wz3f>Jx>{sw;w1TcphMF)w^p(0 zNKmPb50<}xCxfRkDo`f(=BK{>MCB^9%7eVa0rrSRr z?awta6de|K=H3PydwW%D>lCBv7pC0q=!%5kqO+(1v@cE7E3}IE(f_%B4Mlu&Ys%%j z$c@BJyZMd+%i!4|Hl|g(PdBPhLw_SGouJcHjOg;ut3d5iRJVmueu6^dl%u^i?>Y6d ztW2sZkXw>e=qkR~;o*HfJ>i}C4)H_NJb5ir2r@;ckbS8EO4|<)GyWp zmZ}Fq&A9pFN}_JQ8h9HuqApTKyE#O_Hfr_i)jY2Ab`Y-Du3t~k5hKJs+{}UX_l2&~ zL!qpPI>{`3_WtHZ)Zbn_D4M-AmE0~?VXyE_MF@r2Gh#NqQX*Jn>e!#JB$-@QKv$L9 zvn*Td(sLBLx7cbApU+`x; zCLO1h8E0RFg+PME0mW#vBfyx;=YOv7fO$8$?>MG(4gjSYR~XM%XinB|9;%OSjFj17 zu0f4x&$fc_RiWkFW*YCA6%b6u10DAvLwxJ0_m=-qTB^D7T*jfOcFq#wJIWN*@9K_v zql_o{?R9q)+ioy5Vb_?Yp#iI*lU?iYd!MZ?#+Kx}it7hz<5iQXcE@#{c zIwGrHL5|zE!Faf43$~;_(iSCc^46OzYWZ zw?VfjiUTN~8qysgJ__=o4E2KF(N_SEW+d~0X`E(8V%c@`G&n|FHGq3R6XXm!e~gb$ z?YLTk;_cOI(=sf9Sf)`uAgwsYDMdcQ^dJUf9NGiV`}gmwC1|`t+{Ys4=F!sPkwg7b zowHX@j_oU%Uw=VL$|oQo0J0Y0N#i!(Q*z9YjbCrxY9j@NO_mU&jNr#-eDev{CWsWf zJ?p9_tC!zgb53o=jr-sl3PtuY3KCx1lUGrGt~YOp@m(%g_vQTOxBmXX|Mm4&d5DNJ z82~NJjtAHcm=@uWfN=DUR@hM}@a^&wu6WRo)vT8m_aQ z4rDsji}?-d0Vu*w!?DpeIb{hOfBp6Ic<~B5wlcr*5(n9mv@g(3H*DJU47|O|eZ^HZ zR!IUHqgJ}n6O{gNC~mEZe3FuqpwjIY3p4LyS@1hA9mc!TG}zh-owE-fJgAbSBcK|m zBE4q)=7#DJAr@&&9h6ZeAm1JUs36iOm{G>e$-n!TE&+1@|9JZq6uRRqED}!-%Ta-z zK!3>t1Y2mcZI?KIp6H;E(v=||65vN$EDqE9REA=M@y%g=etvZQ826RsCaW%g(q3ah zq{zoFU3!V3jn7$0b_Y^sa=cMEWT2I)g^9ZMj*W#{7U5ALwl$_1$}9mTp`<^BWECIg zzN`rbl&(1gZb8H$wPd|Sj-sDu(CRKCz^4&qX7y2WgbtuYMWH6OCh%kWBysNCAaZ8- z^Q*f=sog8$#3M6}FViCG&$)AdZbs5Rf4aa0)=1u0+w=+B&Kt0aYGAxDJ#Ra@>CD`H}P*uw$0d)^te>3VoR| z_A?vn$R*W;RW0GKXaw)n!bA;Hmr9X~Bhk9?Rf?!r2}{|Kn{eaLmyLNZJ3+W;(YEgZ#|h;6%!m(FL} z$jG(HwkxRyE&f44^Pk%<*p9aGP7l_e#RyG-KN*N!@cOrNUKYobuRo!qP-ZViNNph! zJ-1P1O0GDc9Jo5srzQt#NEvLJLGe4v&Mprq<1)xt$9}YE!Cp;asUbmAna~kMMa8^W z@!P96Y~I|2GMDKvu16CD<8dDx_}se|$dRJSdG!Mw85t1WYNCR}o(StAPAZt|s!B;c zz!Xw7@&beX;OEl>K43AYhvSpuFwBa>AnB}?JCY(u$RSDo#VMjklVZoWgVt5?1o`2v!hB@3ZNyB6!O1k6 zwND>H;wi(>>Bo;B6H`+e4gxnd9G9jP3>JQidDD&I!Ej|DHz=ZCe0+Ri#YqMT+R)<2 z7w4eUtVXl| zXNT%J_k7qSq<&}d;HdG}aRolN-*pbHp_Fa&^75LPnaOgyuZSTZX6Pe23?py<4bP)U zfD@#5?jV;jUR+vYIo7%3;J5Db_V)I9ZeLx5zIbS=1{;#@IUe&F2p+(`WOa2lJ@3}1 zACgWVMqvZ5szYdD7!{8tibo(Iqn_L6)N;~2*(0DtU<88j%-OSKU=3xYE!~dt=B%}y zU8S$sm0P+TGg#->2M-3A8?dnT!MUVL^Zd09DeY((jlu&$Sj#@;-G6TTB$uiU5vg z8ID4O?6y_tEUotE`XN{}Ly{w%Ub+ssnt_=bzEYNIhpBJrhYzg6!X0^DXqV_U(Ga@~ zT8u#!wr<^e?J$M%y~dzBAcJNIWC%h{>zQ0$4h{}pn-zeDTKe&$b~G}0 z;8H0(A7ZJr2bC{R-z775S|&TXQY{$|kJR?YYZ^^g#z@_) z%wdN8tNs8RfBY)S*t-Y#Fy{$U+5Y$S|Fr@~oOi(M?K^jxEM;HbN%T~LfS~nX zpb`O{OfjJvISoYyOcWR)Cf$)1n3NPE^WcB0 z8!h1gYd+pByk(1vlQo#7svuu7jj9;I8D)qk2KEAGg7B%m0|UnCp`V<<8ZE<;%bUI2tL{Ka1 zTA;_~y>XyvM>#naA)^w-G@+sZgFrHd&2tz}%E>v!Yw}qf=+{5~xVPmmlrP=YWBM1r zLv|R-9SE(kE5i3ZKY5i0!$si)I0T>H?ks-&|LdOn@4D^&Tc6*@TGo+gFM};?2Kus% z+ee|?VyjT|$-Um=k!Lsb8q*ZeybFR3V=tIvq1FU(UOwZs{a3_P;I*}ZV0**B=)_3{ z*(h-oN(q5Eso(IdW2MK0j>8bAgP{mfpP)x&7LrS$>3T7sLR)jRGWyQJNCZk1oP<6= z<9iEPazDS}@iOAl>2{qBnYP!uIR_;Qngx~n8Q@8WWS0C1P;GgPChGy`>=tG$mcTX1 zpEJ;wi6#(j{tFtlV%B*Bj3-`#KMi6;M96B<%Ca+Z#_XflUZU~KHiKG#CDNQ3d`&<`z%BtLn6O?N~)>zd}1 zUEJp(J}AS3gnYN;E(U`VP(rtTQ;7K<5SS#eJAsljW1Z&QHCIx9M_P)9mZ|v&fTzee z!tx68wK+rNC@`oJ2|1z9v)m=l7bDyn3fsk$!D4G3uN#yE6Vq7R1NFOrDY1OU=?ki>J^LG5UN z;y5C6Tw<3oH-M2V3Idn$PHVhxa~7KbE}LQA%s%(?rwOVVkxMXm%Yem`STqH64`CrZ z(s&OZl145PSZABI9YMIBnz$v9h!fc8p|<7(&2a3Ph}}h8Rns?dKR2KVI6;ItKiydf zq>lf3JE!}x9&u1v^Jcz$xdi2P_|;>wnUAim-NnJdv4kQx)TR(3$l~BId!HStx&L?? zg8`C0mf#6c)TS)J$syS78n8rRS4&CE$zU z7dqRTx6}q84lS%1#nS%En^n2Fxv}UEFgjEW&Ew9wro|6&!IAShyW8!!G`K!74-iFx zUi7CpOnL;|WAI`VsfZOQc7C}ZoNDJ%_sUY(^fYp z<@j=cI!4?g(+$c^&4gb-!sLZGylKxVMLbVmK7?rTP<+Z3Y8U~9`c`cj(pcK`q9jPP z)nQ^`wvFUbOV+I2B_<{Yq4PY|j@EPnML~h+mDRPRF&tpVg=VRbJ!$lRL8?fsYm=qse zMG-Hea@?sTlQ9H)JkpwP)T-cz2U%Bqs|m7&%u+BXwuolMr`@j3?k7Z;@JvE5zP`Jc zIEq^h;x_g=tPq+&W{M<$3mhj&9l}Vr-iv{>`*&lfqZGnKi5Ced!lDiQGrqUiAeh7_ zu{9wa$});0BACJBT4X7dFJZ^DYkbR(VHfL)To*Es%XX7M^r{$_!C%;(u>e~DA0J=+ zKl-j+ETWGot^GkI+#S>Nv}AMa1*H9|R5!G)xSv07(%xrZUjy1;n7uz81RJ`DG{}Y@ zT)8h^yZ}f(Oz;YZZ4;A|^{5}ju9)71ZrwLN9%fn$C{LIry89ckr=9XANR$yfp`14M zkO7nGw|Cbc-@jj+fT3cyLgYI$nOQdH+7A2yt=XH3o9JmmH3PA4)Am@ZpC?LKR^k zBSu7gco^OaKW1QY1u{#3)=JFX&VJ;#AG+8-k;NyOiM^2;f!}3?BL^_2$q@4*a?W71 zCG}8=D2le9kIyA;(0lhNmu&_RLU}K}p}<8N2Xh%U0+UaMql)4H0eltnf9G$B8!WD9 zNgOmNkUs6epcJ8U{=aGknGL{oKBc%s_fIHUK;;19+G@nH0q;LacNStPG}?-(5_&NJ+|NdC z1hc>}gRIkT_hJ%Df{=_XWKOO?g3WpAN*a)LC(8t-mP{EFnhS3_I+j;e5trL=UF73+ zoA<<^Dg%xy1H~yzN&V`;hF-F;E)Zgvv=Y>sYi$En!O=k1(p1s;!9$QM$p*tiLZlT; z9|J!=Ta8j#mUQ|9r{@BD&+u^EE^Z?e9B*(`L~G@I>SEgr81v@t|75WK?|1!YE5Ie; z6L|8f2k;|{y1F_UH|0BwXE{idHdHoy`Y<3OnOcZxHM}yBpc+JSpJ7MhwP+S?<|58Sj53JN$->s!TJh%1oEsx8mp^_2 zmT~)HFur=M4@O;_P4NY5mCMHq24MbQ1rb+h6lq0X-rfPEtM=Y>jg{uNDNyLpzEhgw z@Y{-L?)Usjt9J|Mx_Vp>diT{ipKI&3D9M>!F~+yHo`j$9$vzQ5U1xFeTYKY9%TJ~` ztXet0?bI;m4!0|l8tlcAI`CBJ=>=LH(qB(di-4fuFoa0HQIssd;$n9X8p&HMK}M0h zoZMfQqt_CTaaO3;tguWMV%G*tjL@t9o`X?(&lV^w+QJ_JLKz1QYgUI{e|_NUwHspK zuOl555Rzp`o>LQloSzvD{wnylxRz=iU6RtkQ<+}W{he%>)Qd3jo^a}GZp9vXTeF=nQ z{OR~zPi7Dc{(5EUpR+Kn)M88PLj-@h#e;8J(C}R7Yz8p%QRt(s|3=W`MyR^~!Kspa z81e$9pn34O^bQZR0$!pnk^?#z@y{&>f)kcsNiD;_fCYdh4VYWjRaFpALA^+JG0fWn z0s_RgmRk@_nNUI*Nf=)PtU*{j5%J!?KS$$&4%rCml7t|dP;78NrQG4RX>tzUd5Fime|sfcFo#u}C9~|MIMD z-TE42uO(Nj_wXG$8wpt8`Y?=v*hHM*O*5D&#DiGzbsK2`CqWvm=Y^8yi>w%bC{(PQqII}hs zsL%!^7^pC=VgRa;bIVQ$?+d(7gYFz4aUvqjN({_KPJ{2wzI`xDrWZX&))#h=Uf+LHcEh`47a97<47T~ zB%=~&;S67ebqIVS$I<+WPneJRbrZGKafr|LXUH!;N8mt7fSP8Gff2E3KrTsu^${3V z=nLbn-W)6q2nkMubVLNamG7bdC;w?^^!1pKn_lVc?8?tj+P?g(e390Cc*z5&#d_{3=qh^S{A`g)j$sbj9G z5+%EfpcV)l=`&yo3CIW{^S8F?Q)c?Ue0k~6JxhH;o*q0=ve60aG}B-~!Dgk}4X+ z5XJ^B)HSjRnEGPO(ExnUOeW#=aJ7 nb(Y#5#2HZ&X!3I~a{YZ|-ZD5Mh`NBj+)R z$Vdz>2z{bQ5-TWi?{}I7b6&m!j5hu0c=8}7ESMB#;lNBd+&t}xhhdw8>yqFkkp6~v zbq-P;VNPwD<27sWVJ0SUoM%l_epa?=d(YT8 zRzROU!Zi|Z@?I@D%EY?U>%@~_ID87Z41SD2OFe*MgH#Pnq@oKr=xz<+s=W_y-J;-} zOj*Co66R8w)?d$%BLH5{R&jFQp}Qq~k=QtY`Q`7^Pp+_Nv1+D8OgmdM zt1%gOz*7}mq?V);3y4bK9BL$SvDaHz(V7yWSrFkHZl?x_S+d1XIw6ST6fd~3ABrfM zNMZ{HD!tZ}sX{?=?9Xi0s|Wc_NG8M{AiAAx@;@yQ0n9}9#euv`;v2^;#Upcwsv*Y| ztq`UL?#-}e%PZt4F`H{=TdHD4#hpNkO#{4A5jcCOaYP}s0o@;?-)DQeoGH2QD7SQ) zV95zb4gCP9K?nC-4@?*z@I;${QpkZtPUt+Qb|Wpde;tFwn43)_ItbIhc$PCG@pd{YnfgNfu0WI^B&ZD{QSdJZxJIG*eJ=L z;oCCt!`b2-jPuy{uA=1L+d|d`M{|hRfq{V`5+yVi|4sb(ky%>FN(AkiPk?3qF@9&NDVDMFe76G$WRh$36RRwAU z#C{VpU8fQ8qu(AinpoFhvx*GH5ksHLy0C1*z;_QadmLho$gq$R6y-;_Zt1*#fWU=K z%|t}57q8qksBS1hb(lJ;z+XUw5;B;@uQkAOCaqKxDl+}`K9d=$1vy3tSv4Luq+mB< zrzA%mth$TH&rw&%JrEZk%C~~LvWkiy3cCsxmw`<~5fjb;1UWfc&>`?E#fA>^?ui$$ ztpQMRzum7yb-jz^VQ_$)oFM@f^0ymk_zYb*7KXTZQhPBAK}H+bOa7;WeB&HB zz!0d*+GrreYloH^4NnA7rUdePC^j!jIq6EeBv4EY*8gMd{@>E_|KpkbWhVQn!odBz S=fVFd6ln?hb4h0|-T6N-b1A|A