Merge pull request #87 from cohere-ai/rag-connector

Add code for article on Chat RAG with connectors
cohere-ai · Dec 13, 2023 · e554831 · e554831
2 parents 1aa33a5 + 2dd5dd8
commit e554831
Show file tree

Hide file tree

Showing 4 changed files with 310 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -142,4 +142,12 @@ Learn how to build a chatbot using Cohere’s Chat, Embed, and Rerank endpoints.
 ">Notebook</a> | <a href="https://colab.research.google.com/github/cohere-ai/notebooks/blob/main/notebooks/RAG_Chatbot_with_Chat_Embed_Rerank.ipynb
 ">Colab</a>]
 
-<img src="https://raw.githubusercontent.com/cohere-ai/notebooks/main/notebooks/images/rag-chatbot.png" style="width:100%; max-width:400px" alt="How to Build a RAG-Powered Chatbot with Chat, Embed, and Rerank" />
+<img src="https://raw.githubusercontent.com/cohere-ai/notebooks/main/notebooks/images/rag-chatbot.png" style="width:100%; max-width:400px" alt="How to Build a RAG-Powered Chatbot with Chat, Embed, and Rerank" />
+
+### 22. How to Build a RAG-Powered Chatbot with Connectors
+Learn how to build a chatbot connectors using Cohere's build-your-own connectors framework.
+[<a href="https://github.com/cohere-ai/notebooks/blob/main/notebooks/RAG_Chatbot_with_Connectors.ipynb.ipynb
+">Notebook</a> | <a href="https://colab.research.google.com/github/cohere-ai/notebooks/blob/main/notebooks/RAG_Chatbot_with_Connectors.ipynb.ipynb
+">Colab</a>]
+
+<img src="https://raw.githubusercontent.com/cohere-ai/notebooks/main/notebooks/images/rag-chatbot-connector.png" style="width:100%; max-width:400px" alt="How to Build a RAG-Powered Chatbot with Connectors" />
diff --git a/examples/chat_rag_connector/demo b/examples/chat_rag_connector/demo
diff --git a/notebooks/RAG_Chatbot_with_Connectors.ipynb b/notebooks/RAG_Chatbot_with_Connectors.ipynb
@@ -0,0 +1,300 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cohere\n",
+    "import os\n",
+    "import uuid\n",
+    "from typing import List, Dict\n",
+    "\n",
+    "COHERE_API_KEY = os.getenv(\"COHERE_API_KEY\")\n",
+    "co = cohere.Client(COHERE_API_KEY)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define and process documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Documents processed successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "# Define the sources for the documents\n",
+    "# As an example, we'll use LLM University's Module 1: What are Large Language Models?\n",
+    "# https://docs.cohere.com/docs/intro-large-language-models\n",
+    "\n",
+    "sources = [\n",
+    "    {\n",
+    "        \"title\": \"Text Embeddings\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/text-embeddings\"},\n",
+    "    {\n",
+    "        \"title\": \"Similarity Between Words and Sentences\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/similarity-between-words-and-sentences\"},\n",
+    "    {\n",
+    "        \"title\": \"The Attention Mechanism\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/the-attention-mechanism\"},\n",
+    "    {\n",
+    "        \"title\": \"Transformer Models\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/transformer-models\"}   \n",
+    "]\n",
+    "\n",
+    "DEMO_CONNECTOR_API_KEY = \"YOUR_CONNECTOR_API_KEY\" # Replace with your connector API key\n",
+    "\n",
+    "headers = {\n",
+    "    \"Authorization\": f\"Bearer {DEMO_CONNECTOR_API_KEY}\"\n",
+    "}\n",
+    "\n",
+    "response = requests.post(\"YOUR_API_URL/process\", # Replace with your API's URL\n",
+    "                         json={\"sources\": sources},\n",
+    "                         headers=headers)\n",
+    "\n",
+    "print(response.json()[\"message\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Documents processed successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "# Define the sources for the documents\n",
+    "# As an example, we'll use LLM University's Module 1: What are Large Language Models?\n",
+    "# https://docs.cohere.com/docs/intro-large-language-models\n",
+    "\n",
+    "sources = [\n",
+    "    {\n",
+    "        \"title\": \"Text Embeddings\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/text-embeddings\"},\n",
+    "    {\n",
+    "        \"title\": \"Similarity Between Words and Sentences\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/similarity-between-words-and-sentences\"},\n",
+    "    {\n",
+    "        \"title\": \"The Attention Mechanism\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/the-attention-mechanism\"},\n",
+    "    {\n",
+    "        \"title\": \"Transformer Models\", \n",
+    "        \"url\": \"https://docs.cohere.com/docs/transformer-models\"}   \n",
+    "]\n",
+    "\n",
+    "DEMO_CONNECTOR_API_KEY = \"6qXN7O-FdR9Q-89nW5UpbvRAk9q8oTgtc0EJz98yAg4\" # Replace with your connector API key\n",
+    "\n",
+    "headers = {\n",
+    "    \"Authorization\": f\"Bearer {DEMO_CONNECTOR_API_KEY}\"\n",
+    "}\n",
+    "\n",
+    "response = requests.post(\"https://demo-conn.onrender.com/process\", # Replace with your API's URL\n",
+    "                         json={\"sources\": sources},\n",
+    "                         headers=headers)\n",
+    "\n",
+    "print(response.json()[\"message\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Chatbot component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Chatbot:\n",
+    "    def __init__(self, connectors: List[str]):\n",
+    "        self.conversation_id = str(uuid.uuid4())\n",
+    "        self.connectors = [{\"id\": c} for c in connectors]\n",
+    "\n",
+    "\n",
+    "    def generate_response(self, message: str):\n",
+    "        response = co.chat(\n",
+    "                        message=message,\n",
+    "                        connectors=self.connectors,\n",
+    "                        conversation_id=self.conversation_id,\n",
+    "                        stream=True\n",
+    "                        )\n",
+    "\n",
+    "        for event in response:\n",
+    "                yield event"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### App component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class App:\n",
+    "    def __init__(self, chatbot: Chatbot):\n",
+    "        self.chatbot = chatbot\n",
+    "        \n",
+    "    def run(self):\n",
+    "        while True:\n",
+    "            # Get the user message\n",
+    "            message = input(\"User: \")\n",
+    "\n",
+    "            # Typing \"quit\" ends the conversation\n",
+    "            if message.lower() == \"quit\":\n",
+    "                print(\"Ending chat.\")\n",
+    "                break\n",
+    "            else:\n",
+    "                print(f\"User: {message}\")\n",
+    "\n",
+    "            # Get the chatbot response\n",
+    "            response = self.chatbot.generate_response(message)\n",
+    "\n",
+    "            # Print the chatbot response\n",
+    "            print(\"Chatbot:\")\n",
+    "            \n",
+    "            documents = []\n",
+    "            documents_flag = False\n",
+    "            citations_flag = False\n",
+    "            \n",
+    "            for event in response:\n",
+    "                # Documents\n",
+    "                if event.event_type == \"search-results\":\n",
+    "                    documents_flag = True\n",
+    "                    documents = event.documents\n",
+    "                    \n",
+    "                # Text\n",
+    "                if event.event_type == \"text-generation\":\n",
+    "                    print(event.text, end=\"\")        \n",
+    "\n",
+    "                # Citations\n",
+    "                if event.event_type == \"citation-generation\":\n",
+    "                    if not citations_flag:\n",
+    "                        print(\"\\n\\nCITATIONS:\")\n",
+    "                        citations_flag = True\n",
+    "                    print(event.citations)\n",
+    "            \n",
+    "            if documents_flag:\n",
+    "                print(\"\\n\\nDOCUMENTS:\")\n",
+    "                for d in documents:\n",
+    "                    print(f'{d[\"title\"]} ({d[\"id\"]}). URL: {d[\"url\"]}')\n",
+    "\n",
+    "            print(f\"\\n{'-'*100}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run chatbot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "User: What is attention\n",
+      "Chatbot:\n",
+      "Attention is a technique used in language models to provide context to each word in a sentence or text, based on the other words. Attention plays a crucial role in transformer models, which can help improve large language models.\n",
+      "\n",
+      "CITATIONS:\n",
+      "[{'start': 60, 'end': 67, 'text': 'context', 'document_ids': ['demo-conn-tm17qr_0', 'demo-conn-tm17qr_1', 'demo-conn-tm17qr_2']}]\n",
+      "[{'start': 68, 'end': 102, 'text': 'to each word in a sentence or text', 'document_ids': ['demo-conn-tm17qr_1', 'demo-conn-tm17qr_2']}]\n",
+      "[{'start': 117, 'end': 129, 'text': 'other words.', 'document_ids': ['demo-conn-tm17qr_1']}]\n",
+      "[{'start': 148, 'end': 160, 'text': 'crucial role', 'document_ids': ['demo-conn-tm17qr_2']}]\n",
+      "[{'start': 164, 'end': 182, 'text': 'transformer models', 'document_ids': ['demo-conn-tm17qr_2']}]\n",
+      "[{'start': 199, 'end': 229, 'text': 'improve large language models.', 'document_ids': ['demo-conn-tm17qr_2']}]\n",
+      "\n",
+      "\n",
+      "DOCUMENTS:\n",
+      "Transformer Models (demo-conn-tm17qr_0). URL: https://docs.cohere.com/docs/transformer-models\n",
+      "Transformer Models (demo-conn-tm17qr_1). URL: https://docs.cohere.com/docs/transformer-models\n",
+      "Transformer Models (demo-conn-tm17qr_2). URL: https://docs.cohere.com/docs/transformer-models\n",
+      "\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "\n",
+      "Ending chat.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define connectors\n",
+    "connectors = [\"demo-conn-tm17qr\"]\n",
+    "\n",
+    "# Create an instance of the Chatbot class by supplying the connectors\n",
+    "chatbot = Chatbot(connectors)\n",
+    "\n",
+    "# Create an instance of the App class with the Chatbot instance\n",
+    "app = App(chatbot)\n",
+    "\n",
+    "# Run the chatbot\n",
+    "app.run()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/images/rag-chatbot-connector.png b/notebooks/images/rag-chatbot-connector.png