deepset-ai · Timoeller · Feb 10, 2023 · Feb 16, 2023 · Feb 16, 2023 · Feb 16, 2023
@@ -36,8 +36,6 @@
 
 
 OPENAI_TIMEOUT = float(os.environ.get(HAYSTACK_REMOTE_API_TIMEOUT_SEC, 30))
-OPENAI_BACKOFF = float(os.environ.get(HAYSTACK_REMOTE_API_BACKOFF_SEC, 10))
-OPENAI_MAX_RETRIES = int(os.environ.get(HAYSTACK_REMOTE_API_MAX_RETRIES, 5))
 
 
 class OpenAIAnswerGenerator(BaseGenerator):
@@ -52,21 +50,23 @@ class OpenAIAnswerGenerator(BaseGenerator):
     def __init__(
         self,
         api_key: str,
-        model: str = "text-curie-001",
-        max_tokens: int = 13,
-        top_k: int = 5,
+        model: str = "text-davinci-003",
+        max_tokens: int = 40,
+        top_k: int = 1,
         temperature: float = 0.2,
         presence_penalty: float = -2.0,
-        frequency_penalty: float = -2.0,
+        frequency_penalty: float = 0,
         examples_context: Optional[str] = None,
         examples: Optional[List] = None,
+        instructions: Optional[str] = None,
+        add_runtime_instructions: bool = False,
         stop_words: Optional[List] = None,
         progress_bar: bool = True,
     ):
         """
         :param api_key: Your API key from OpenAI. It is required for this node to work.
         :param model: ID of the engine to use for generating the answer. You can select one of `"text-ada-001"`,
-                     `"text-babbage-001"`, `"text-curie-001"`, or `"text-davinci-002"`
+                     `"text-babbage-001"`, `"text-curie-001"`, or `"text-davinci-003"`
                      (from worst to best and from cheapest to most expensive). For more information about the models,
                      refer to the [OpenAI Documentation](https://platform.openai.com/docs/models/gpt-3).
         :param max_tokens: The maximum number of tokens allowed for the generated Answer.
@@ -80,21 +80,20 @@ def __init__(
                                   verbatim.
         :param examples_context: A text snippet containing the contextual information used to generate the Answers for
                                  the examples you provide.
-                                 If not supplied, the default from OpenAPI docs is used:
-                                 "In 2017, U.S. life expectancy was 78.6 years."
         :param examples: List of (question, answer) pairs that helps steer the model towards the tone and answer
-                         format you'd like. We recommend adding 2 to 3 examples.
-                         If not supplied, the default from OpenAPI docs is used:
-                         [["What is human life expectancy in the United States?", "78 years."]]
+                         format you'd like.
+        :param instructions: Here you can initialize custom instructions as prompt. Defaults to 'Create a concise and informative answer...'
+        :param add_runtime_instructions: If you like to add the prompt instructions (the instructions around the question)
+                                         during querying or not. Defaults to using predefined prompt instructions.
+                                         If you do add instructions at runtime separate instructions and question like:
+                                         "... <instructions> ... [SEPARATOR] <question>"
+                                         Also make sure to mention "$documents" and "$query" in the <instructions>, such
+                                         that those will be replaced in correctly.
         :param stop_words: Up to 4 sequences where the API stops generating further tokens. The returned text does
                            not contain the stop sequence.
                            If you don't provide it, the default from OpenAPI docs is used: ["\n", "<|endoftext|>"]
         """
         super().__init__(progress_bar=progress_bar)
-        if not examples_context:
-            examples_context = "In 2017, U.S. life expectancy was 78.6 years."
-        if not examples:
-            examples = [["What is human life expectancy in the United States?", "78 years."]]
         if not stop_words:
             stop_words = ["\n", "<|endoftext|>"]
 
@@ -110,6 +109,27 @@ def __init__(
         self.frequency_penalty = frequency_penalty
         self.examples_context = examples_context
         self.examples = examples
+        self.add_runtime_instructions = add_runtime_instructions
+        if not instructions:
+            self.instructions = (
+                f"Create a concise and informative answer (no more than {max_tokens} words) for a given "
+                f"question based solely on the given documents. You must only use information from the given "
+                f"documents. Use an unbiased and journalistic tone. Do not repeat text. Cite the documents "
+                f"using Document[$number] notation. If multiple documents contain the answer, cite "
+                f"each document like Document[$number], Document[$number], Document[$number] ... If "
+                f"the documents do not contain the answer to the question, say that "
+                f"'answering is not possible given the available information.'\n "
+                f"$documents; \n Question: $query; Answer: "
+            )
+        else:
+            if "$documents" in instructions and "$query" in instructions:
+                self.instructions = instructions
+            else:
+                logger.warning(
+                    f"Instructions do not have the right format. You need to include '$documents' and '$query'. "
+                    f"You supplied: {instructions}"
+                )
+                self.instructions = instructions
         self.stop_words = stop_words
 
         tokenizer = "gpt2"
@@ -127,7 +147,11 @@ def __init__(
             logger.debug("Using GPT2TokenizerFast")
             self._hf_tokenizer: PreTrainedTokenizerFast = GPT2TokenizerFast.from_pretrained(tokenizer)
 
-    @retry_with_exponential_backoff(backoff_in_seconds=OPENAI_BACKOFF, max_retries=OPENAI_MAX_RETRIES)
+    @retry_with_exponential_backoff(
+        backoff_in_seconds=int(os.environ.get(HAYSTACK_REMOTE_API_BACKOFF_SEC, 1)),
+        max_retries=int(os.environ.get(HAYSTACK_REMOTE_API_MAX_RETRIES, 5)),
+        errors=(OpenAIRateLimitError, OpenAIError),
+    )
     def predict(
         self,
         query: str,
@@ -216,31 +240,54 @@ def _build_prompt(self, query: str, documents: List[Document]) -> Tuple[str, Lis
         """
         Builds the prompt for the GPT-3 model so that it can generate an Answer.
         """
-        example_context = f"===\nContext: {self.examples_context}\n===\n"
-        example_prompts = "\n---\n".join([f"Q: {question}\nA: {answer}" for question, answer in self.examples])
-        instruction = "Please answer the question according to the above context.\n" + example_context + example_prompts
-        instruction = f"{instruction.strip()}\n\n"
-
-        qa_prompt = f"Q: {query}\nA:"
-
-        n_instruction_tokens = self._count_tokens(instruction + qa_prompt + "===\nContext: \n===\n")
+        ## Clean documents
+        for doc in documents:
+            doc.content = self._clean_documents(doc.content)
+
+        ## Add example prompts
+        if self.examples and self.examples_context:
+            example_context = f"===\nContext: {self.examples_context}\n===\n"
+            example_QAs = "\n---\n".join([f"Q: {question}\nA: {answer}" for question, answer in self.examples])
+            example_prompt = (
+                "Please answer the question according to the above context.\n" + example_context + example_QAs
+            )
+            example_prompt = f"{example_prompt.strip()}\n\n"
+        else:
+            example_prompt = ""
+
+        ## Compose prompt
+        # Switch for adding the prompt instructions at runtime.
+        if self.add_runtime_instructions:
+            temp = query.split("[SEPARATOR]")
+            if len(temp) != 2:
+                logger.error(
+                    f"Instructions given to the OpenAIAnswerGenerator were not correct, please follow the structure "
+                    f"from the docstrings. You supplied: {query}"
+                )
+                current_prompt = ""
+                query = "Say: incorrect prompt."
+            else:
+                current_prompt = temp[0].strip()
+                query = temp[1].strip()
+        else:
+            current_prompt = self.instructions
 
+        # Inserting the query into the prompt here.
+        current_prompt = current_prompt.replace("$query", query)
+        n_instruction_tokens = self._count_tokens(example_prompt + current_prompt)
         logger.debug("Number of tokens in instruction: %s", n_instruction_tokens)
-
-        n_docs_tokens = [self._count_tokens(doc.content) for doc in documents]
+        n_docs_tokens = [self._count_tokens(f"\nDocument[{i}]: " + doc.content) for i, doc in enumerate(documents)]
         logger.debug("Number of tokens in documents: %s", n_docs_tokens)
 
-        # for length restrictions of prompt see: https://platform.openai.com/docs/api-reference/completions/create#completions/create-max_tokens
+        # Add as many Documents as fit into the model.
         leftover_token_len = self.MAX_TOKENS_LIMIT - n_instruction_tokens - self.max_tokens
-
-        # Add as many Documents as context as fit into the model
         input_docs = []
         input_docs_content = []
         skipped_docs = 0
-        for doc, doc_token_len in zip(documents, n_docs_tokens):
+        for i, (doc, doc_token_len) in enumerate(zip(documents, n_docs_tokens)):
             if doc_token_len <= leftover_token_len:
                 input_docs.append(doc)
-                input_docs_content.append(doc.content)
+                input_docs_content.append(f"\nDocument[{i}]: " + doc.content)
                 leftover_token_len -= doc_token_len
             else:
                 skipped_docs += 1
@@ -258,17 +305,22 @@ def _build_prompt(self, query: str, documents: List[Document]) -> Tuple[str, Lis
                 self.MAX_TOKENS_LIMIT,
             )
 
-        # Top ranked documents should go at the end
-        context = " ".join(reversed(input_docs_content))
-        context = f"===\nContext: {context}\n===\n"
+        # Top ranked documents should go at the end.
+        context_documents = " ".join(reversed(input_docs_content))
 
-        full_prompt = instruction + context + qa_prompt
-        logger.debug("Full prompt: %s", full_prompt)
+        current_prompt = current_prompt.replace("$documents", context_documents)
+        logger.debug("Full prompt: %s", current_prompt)
 
-        return full_prompt, input_docs
+        return current_prompt, input_docs
 
     def _count_tokens(self, text: str) -> int:
         if USE_TIKTOKEN:
             return len(self._tk_tokenizer.encode(text))
         else:
             return len(self._hf_tokenizer.tokenize(text))
+
+    def _clean_documents(self, text: str) -> str:
+        to_remove = {"$documents": "#documents", "$query": "#query", "\n": " "}
+        for x in to_remove.keys():
+            text = text.replace(x, to_remove[x])
+        return text
@@ -437,6 +437,7 @@ def __init__(
     @retry_with_exponential_backoff(
         backoff_in_seconds=int(os.environ.get(HAYSTACK_REMOTE_API_BACKOFF_SEC, 5)),
         max_retries=int(os.environ.get(HAYSTACK_REMOTE_API_MAX_RETRIES, 5)),
+        errors=(OpenAIRateLimitError, OpenAIError),
     )
     def invoke(self, *args, **kwargs):
         """