From f40c35fd149965dc50e45c9f45b8813853fd2bce Mon Sep 17 00:00:00 2001 From: Siddharth Balyan Date: Fri, 3 May 2024 14:35:07 +0530 Subject: [PATCH] summarization step added. --- agents-api/notebooks/03-summarise.ipynb | 762 ++++++++++++++++++++++-- 1 file changed, 724 insertions(+), 38 deletions(-) diff --git a/agents-api/notebooks/03-summarise.ipynb b/agents-api/notebooks/03-summarise.ipynb index 88357151d..a934fd1b9 100644 --- a/agents-api/notebooks/03-summarise.ipynb +++ b/agents-api/notebooks/03-summarise.ipynb @@ -33,6 +33,13 @@ "- REMOVE: Safely remove messages from the session" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper functions and Setup" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -50,20 +57,24 @@ "metadata": {}, "outputs": [], "source": [ - "make_chatml = lambda content, role=\"system\", name=None, **_: {\n", + "make_chatml = lambda content, role=\"system\", name=None, index=None, **_: {\n", " key: value\n", " for key, value in dict(role=role, name=name, content=content).items()\n", " if value is not None\n", "}\n", "\n", - "user = lambda content, name=None: make_chatml(role=\"user\", content=content, name=name)\n", - "assistant = lambda content, name=None: make_chatml(\n", - " role=\"assistant\", content=content, name=name\n", + "user = lambda content, name=None, index=None: make_chatml(\n", + " role=\"user\", content=content, name=name, index=index\n", + ")\n", + "assistant = lambda content, name=None, index=None: make_chatml(\n", + " role=\"assistant\", content=content, name=name, index=index\n", + ")\n", + "system = lambda content, name=None, index=None: make_chatml(\n", + " content, name=name, index=index\n", ")\n", - "system = lambda content, name=None: make_chatml(content, name=name)\n", "thought = lambda content, name=None: make_chatml(content, name=\"thought\")\n", "information = lambda content: system(content, name=\"information\")\n", - "summary = lambda content: system(content, name=\"summary\")\n", + "summary = lambda content, index=None: system(content, name=\"summary\", index=index)\n", "entities = lambda content: system(content, name=\"entity\")" ] }, @@ -89,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -104,9 +115,16 @@ " chat_session.append(make_chatml(**result))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## One-Shot Example Chat" + ] + }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -292,12 +310,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Entity CoT" + "## Entity Extraction One-Shot Example" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -323,12 +341,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Trim CoT" + "## Trim Messages One-Shot Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*TODO*\n", + "- only output the index and the content of the message to be changed.\n", + "- [!] Index was not in the output\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -344,14 +371,18 @@ "\n", "example_trimmed_messages = [\n", " *example_chat[0:8],\n", - " assistant(\"Haha accurate. He does push boundaries and keep things interesting!\"),\n", + " assistant(\n", + " \"Haha accurate. He does push boundaries and keep things interesting!\", index=8\n", + " ),\n", " *example_chat[9:10],\n", " assistant(\n", - " \"For sure, he's definitely made a huge impact in different industries. Excited to see what he'll come up with next\"\n", + " \"For sure, he's definitely made a huge impact in different industries. Excited to see what he'll come up with next\",\n", + " index=10,\n", " ),\n", " *example_chat[11:18],\n", " assistant(\n", - " \"Glad I could make you laugh! It's always nice to share a joke or two. What else is up?\"\n", + " \"Glad I could make you laugh! It's always nice to share a joke or two. What else is up?\",\n", + " index=18,\n", " ),\n", " *example_chat[19:21],\n", " user(\n", @@ -368,7 +399,8 @@ "- Though a tinge of guilt lingers for this routine coming to an end, akin to secretly devouring ice cream without my younger sibling\n", "- For my next foray, I'm resolute about \"buying an ice cream\" and sharing the entire journey with them this time around\n", "\n", - "\"\"\"\n", + "\"\"\",\n", + " index=21,\n", " ),\n", " *example_chat[22:34],\n", " assistant(\n", @@ -382,7 +414,8 @@ "- Highly engaging and enjoyable interactions\n", "- Look forward to more amusing conversations!\n", "\n", - "\"\"\"\n", + "\"\"\",\n", + " index=34,\n", " ),\n", "]" ] @@ -391,16 +424,52 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Summarise CoT" + "## Summarise One-Shot Example" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ - "# TODO:" + "# TODO: add and optimise the Chain of Thought\n", + "summarize_chain_of_thought_example = \"\"\"\\\n", + "Planning step by step:\n", + "- To add context for future entries, let's add one at the top which outlines the main entities in the session.\n", + "- We can replace entries with index 1,2,3,4 with a summary of those messages.\n", + "- We can replace entries with index 5,6,7,8 similarly.\n", + "- It could be disruptive to remove messages with index 9-16 because that might lose the joke's context.\n", + "- We can safely summarize entries with index 17,18 and remove them.\n", + "- We can safely remove message with index 20.\n", + "- We should keep entry with index 21 because it's given by the user and they might ask about it again.\n", + "- We should keep the assistant's response in message with index 22 to keep the context.\n", + "- Messages with index 23-32 are repetitive and should be summarized.\n", + "- We should retain message with index 33 since it's a direct request from the user.\n", + "- We can safely summarize message with index 34's essay into just the salient points only.\n", + "\"\"\"\n", + "\n", + "example_summarized_messages = [\n", + " summary(\n", + " \"Event: Camille says hi to JaneBot and they reply that they are working and trying to stay busy.\"\n", + " ),\n", + " summary(\n", + " \"Event: They discuss Elon Musk and agree that he can be a polarizing personality.\"\n", + " ),\n", + " *example_chat[9:17],\n", + " summary(\"Event: Camille appreciates JaneBot's sense of humor.\"),\n", + " *example_chat[21:23],\n", + " summary(\n", + " 'Event: Camille asks JaneBot to play \"What number I am thinking of?\" game but she keeps saying \"no\" to all guesses which JaneBot finds really funny.'\n", + " ),\n", + " example_chat[33],\n", + " summary(\n", + " \"\"\"Event: JaneBot wrote an essay about Camille. Summary of the essay:\n", + "1. You have a great sense of humor and enjoy playful, lighthearted banter in conversations.\n", + "2. Your creativity is evident in how you approach topics in unexpected ways and keep conversations interesting.\n", + "3. Chatting with you is enjoyable due to the casual, friendly tone you bring to interactions.\"\"\"\n", + " ),\n", + "]" ] }, { @@ -409,28 +478,35 @@ "metadata": {}, "outputs": [], "source": [ - "chat_session = [system(\"you are a friend who likes to give life advice\")]" + "# chat_session = [system(\"you are a friend who likes to give life advice\")]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading Test Chat JSON" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "# Load test chat from json file\n", + "# Test Chat File\n", "import json\n", "\n", "with open(\"./test-chat.json\", \"r\") as f:\n", " chat_session = json.load(f)\n", "\n", - "for message, index in zip(chat_session, range(1000)):\n", - " message[\"index\"] = index" + "for index, message in enumerate(chat_session):\n", + " message[\"index\"] = index + 1" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -439,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -456,7 +532,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -479,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -541,7 +617,6 @@ "\n", " messages.append(cot_result)\n", " start_message = \"\"\"\\\n", - "\n", "Begin! Directly write the entities as a Markdown formatted list, don't write any thoughts or delimiters.\n", "\n", "\n", @@ -550,11 +625,69 @@ " messages.append(user(start_message))\n", "\n", " print(\"Starting chatml generation\")\n", - " result = generate(messages, model=\"gpt-4-turbo\", temperature=0.1, stop=[\"\n", "\n", "\n", - "{entity_chain_of_thought}\n", + "{summarize_chain_of_thought_example}\n", "\n", "\n", "\n", @@ -633,11 +766,564 @@ " messages.append(user(start_message))\n", "\n", " print(\"Starting chatml generation\")\n", - " result = generate(messages, model=\"gpt-4-turbo\", temperature=0.1, stop=[\"\n", + "- You will be provided with \"Entities\" which are important characters, people, places, things, objects relevant to the conversation.\n", + "- You will make sure to preserve the context for these \"Entities\".\n", + "- You may remove entries in the history that do not contain any crucial information such as small-talk or repeated questions/responses.\n", + "- You will combine multiple entries into a summary strictly as long as that doesn't disrupt the structure of the session.\n", + "- Do not remove content that the user shared if it might be relevant to future messages.\n", + "\n", + "\n", + "\n", + "\n", + "{json.dumps(example_entity_messages, indent=2)}\n", + "\n", + "\n", + "\n", + "{json.dumps(example_chat, indent=2)}\n", + "\n", + "\n", + "\n", + "{summarize_chain_of_thought_example}\n", + "\n", + "\n", + "\n", + "{json.dumps(example_summarized_messages, indent=2)}\n", + "\n", + "\"\"\".strip()\n", + "\n", + " user_message = f\"\"\"\\\n", + "\n", + "{json.dumps(entity_info)}\n", + "\n", + "\n", + "{json.dumps(history_to_compact, indent=2)}\n", + "\n", + "\n", + "\n", + "\"\"\"\n", + " messages = [system(system_prompt), user(user_message)]\n", + "\n", + " print(\"Starting CoT generation\")\n", + " cot_result = generate(messages, model=\"gpt-4-turbo\", stop=[\"\n", + "\"\"\".strip()\n", + "\n", + " messages.append(user(start_message))\n", + "\n", + " print(\"Starting chatml generation\")\n", + " trim_result = generate(\n", + " messages, model=\"gpt-4-turbo\", temperature=0.1, stop=[\"\\n'\n", + " '[\\n'\n", + " ' {\\n'\n", + " ' \"role\": \"system\",\\n'\n", + " ' \"name\": \"summary\",\\n'\n", + " ' \"content\": \"Event: The user discussed finishing \\'Red Dead '\n", + " \"Redemption 2' and particularly enjoyed the last mission. They \"\n", + " 'also talked about favorite missions and characters, specifically '\n", + " 'Arthur over Dutch.\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 12,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"im playing helldivers 2. do u know of it\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 13,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Yes, Helldivers 2! It\\'s a great choice if '\n", + " \"you're into intense co-op action. The game really ramps up with \"\n", + " 'its twin-stick shooter gameplay, and it\\\\u2019s super fun but '\n", + " 'also quite challenging. Working together with teammates to '\n", + " 'complete those missions can get pretty hectic. How are you '\n", + " 'finding the chaos and team dynamics so far?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 14,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"amazing. my fav build is laser cannon + guard '\n", + " 'dog. what about u\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 15,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Nice combo! The laser cannon is awesome for '\n", + " 'tearing through those enemy swarms, and the guard dog drone '\n", + " 'really helps in keeping the smaller threats at bay. I personally '\n", + " 'love using the railgun with a shield backpack. It gives a good '\n", + " 'balance of offense and defense, especially when things get really '\n", + " 'chaotic. What\\'s been the toughest mission for you so far?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 16,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"dude the charger is so difficult to kill.\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 17,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Yeah, the Charger can be brutal! They really test '\n", + " \"your reflexes and teamwork. It's all about timing and \"\n", + " 'coordination with your team to bring those heavy hitters down. '\n", + " 'Have you tried using more strategic traps or mines? Sometimes '\n", + " 'setting up the right position with static fields can really make '\n", + " 'a difference against them. How do your squadmates handle it?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 18,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"we just run and gun\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 19,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Classic strategy! That can definitely be fun, '\n", + " 'especially in a game as intense as Helldivers 2 where things get '\n", + " 'chaotic fast. Keeping on the move helps a ton. Just make sure '\n", + " \"someone's always got your back! Have you guys tried any of the \"\n", + " 'higher difficulty levels yet, or are you sticking to a certain '\n", + " 'comfort zone for now?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"role\": \"system\",\\n'\n", + " ' \"name\": \"summary\",\\n'\n", + " ' \"content\": \"Event: The user discussed an Nvidia driver issue, '\n", + " 'particularly its incompatibility with Linux, and the assistant '\n", + " 'provided potential solutions and resources for assistance.\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 24,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"yep. see u bye!\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 25,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"See you, take care! Bye!\"\\n'\n", + " ' }\\n'\n", + " ']\\n',\n", + " 'role': 'assistant'}\n", + "End CoT generation\n", + "Starting chatml generation\n", + "End chatml generation\n" + ] + } + ], + "source": [ + "trimmed_session = json.loads(trimmed_messages[\"content\"])\n", + "entity_info = entities(entity_info[\"content\"])\n", + "\n", + "\n", + "summarized_messages = summarize_messages(\n", + " entity_info=entity_info, chat_session=trimmed_session\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'content': '[\\n'\n", + " ' {\\n'\n", + " ' \"role\": \"system\",\\n'\n", + " ' \"name\": \"summary\",\\n'\n", + " ' \"content\": \"Event: The user discussed finishing \\'Red Dead '\n", + " \"Redemption 2' and particularly enjoyed the last mission. They \"\n", + " 'also talked about favorite missions and characters, specifically '\n", + " 'Arthur over Dutch.\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 12,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"im playing helldivers 2. do u know of it\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 13,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Yes, Helldivers 2! It\\'s a great choice if '\n", + " \"you're into intense co-op action. The game really ramps up with \"\n", + " 'its twin-stick shooter gameplay, and it\\\\u2019s super fun but '\n", + " 'also quite challenging. Working together with teammates to '\n", + " 'complete those missions can get pretty hectic. How are you '\n", + " 'finding the chaos and team dynamics so far?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 14,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"amazing. my fav build is laser cannon + guard '\n", + " 'dog. what about u\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 15,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Nice combo! The laser cannon is awesome for '\n", + " 'tearing through those enemy swarms, and the guard dog drone '\n", + " 'really helps in keeping the smaller threats at bay. I personally '\n", + " 'love using the railgun with a shield backpack. It gives a good '\n", + " 'balance of offense and defense, especially when things get really '\n", + " 'chaotic. What\\'s been the toughest mission for you so far?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 16,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"dude the charger is so difficult to kill.\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 17,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Yeah, the Charger can be brutal! They really test '\n", + " \"your reflexes and teamwork. It's all about timing and \"\n", + " 'coordination with your team to bring those heavy hitters down. '\n", + " 'Have you tried using more strategic traps or mines? Sometimes '\n", + " 'setting up the right position with static fields can really make '\n", + " 'a difference against them. How do your squadmates handle it?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 18,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"we just run and gun\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 19,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"Classic strategy! That can definitely be fun, '\n", + " 'especially in a game as intense as Helldivers 2 where things get '\n", + " 'chaotic fast. Keeping on the move helps a ton. Just make sure '\n", + " \"someone's always got your back! Have you guys tried any of the \"\n", + " 'higher difficulty levels yet, or are you sticking to a certain '\n", + " 'comfort zone for now?\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"role\": \"system\",\\n'\n", + " ' \"name\": \"summary\",\\n'\n", + " ' \"content\": \"Event: The user discussed an Nvidia driver issue, '\n", + " 'particularly its incompatibility with Linux, and the assistant '\n", + " 'provided potential solutions and resources for assistance.\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 24,\\n'\n", + " ' \"role\": \"user\",\\n'\n", + " ' \"content\": \"yep. see u bye!\"\\n'\n", + " ' },\\n'\n", + " ' {\\n'\n", + " ' \"index\": 25,\\n'\n", + " ' \"role\": \"assistant\",\\n'\n", + " ' \"content\": \"See you, take care! Bye!\"\\n'\n", + " ' }\\n'\n", + " ']',\n", + " 'role': 'assistant'}\n" + ] + } + ], + "source": [ + "pprint(summarized_messages)" ] } ],