austintlee
diff --git a/‎plugin/src/test/java/org/opensearch/ml/rest/RestMLRAGSearchProcessorIT.java
+169-1 b/‎plugin/src/test/java/org/opensearch/ml/rest/RestMLRAGSearchProcessorIT.java
+169-1
diff --git a/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/GenerativeQAResponseProcessor.java
+2-1 b/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/GenerativeQAResponseProcessor.java
+2-1
diff --git a/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/ext/GenerativeQAParameters.java
+18-3 b/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/ext/GenerativeQAParameters.java
+18-3
diff --git a/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/llm/ChatCompletionInput.java
+1 b/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/llm/ChatCompletionInput.java
+1
diff --git a/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/llm/DefaultLlmImpl.java
+31-4 b/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/llm/DefaultLlmImpl.java
+31-4
diff --git a/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/llm/Llm.java
+2-1 b/‎search-processors/src/main/java/org/opensearch/searchpipelines/questionanswering/generative/llm/Llm.java
+2-1
@@ -37,6 +37,7 @@
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import org.opensearch.searchpipelines.questionanswering.generative.llm.LlmIOUtil;
 
 public class RestMLRAGSearchProcessorIT extends RestMLRemoteInferenceIT {
 
@@ -147,6 +148,32 @@ public class RestMLRAGSearchProcessorIT extends RestMLRemoteInferenceIT {
     private static final String BEDROCK_CONNECTOR_BLUEPRINT = AWS_SESSION_TOKEN == null
         ? BEDROCK_CONNECTOR_BLUEPRINT2
         : BEDROCK_CONNECTOR_BLUEPRINT1;
+
+    private static final String COHERE_API_KEY = System.getenv("COHERE_API_KEY");
+    private static final String COHERE_CONNECTOR_BLUEPRINT = "{\n"
+        + "    \"name\": \"Cohere Chat Model\",\n"
+        + "    \"description\": \"The connector to Cohere's public chat API\",\n"
+        + "    \"version\": \"1\",\n"
+        + "    \"protocol\": \"http\",\n"
+        + "    \"credential\": {\n"
+        + "        \"cohere_key\": \"" + COHERE_API_KEY + "\"\n"
+        + "    },\n"
+        + "    \"parameters\": {\n"
+        + "        \"model\": \"command\"\n"
+        + "    },\n"
+        + "    \"actions\": [\n"
+        + "        {\n"
+        + "            \"action_type\": \"predict\",\n"
+        + "            \"method\": \"POST\",\n"
+        + "            \"url\": \"https://api.cohere.ai/v1/chat\",\n"
+        + "            \"headers\": {\n"
+        + "                \"Authorization\": \"Bearer ${credential.cohere_key}\",\n"
+        + "                \"Request-Source\": \"unspecified:opensearch\"\n"
+        + "            },\n"
+        + "            \"request_body\": \"{ \\\"message\\\": \\\"${parameters.inputs}\\\", \\\"model\\\": \\\"${parameters.model}\\\" }\" \n"
+        // + "            \"post_process_function\": \"\\n    String escape(def input) { \\n      if (input.contains(\\\"\\\\\\\\\\\")) {\\n        input = input.replace(\\\"\\\\\\\\\\\", \\\"\\\\\\\\\\\\\\\\\\\");\\n      }\\n      if (input.contains(\\\"\\\\\\\"\\\")) {\\n        input = input.replace(\\\"\\\\\\\"\\\", \\\"\\\\\\\\\\\\\\\"\\\");\\n      }\\n      if (input.contains('\\r')) {\\n        input = input = input.replace('\\r', '\\\\\\\\r');\\n      }\\n      if (input.contains(\\\"\\\\\\\\t\\\")) {\\n        input = input.replace(\\\"\\\\\\\\t\\\", \\\"\\\\\\\\\\\\\\\\\\\\\\\\t\\\");\\n      }\\n      if (input.contains('\\n')) {\\n        input = input.replace('\\n', '\\\\\\\\n');\\n      }\\n      if (input.contains('\\b')) {\\n        input = input.replace('\\b', '\\\\\\\\b');\\n      }\\n      if (input.contains('\\f')) {\\n        input = input.replace('\\f', '\\\\\\\\f');\\n      }\\n      return input;\\n    }\\n    def name = 'response';\\n    def result = params.text;\\n    def json = '{ \\\"name\\\": \\\"' + name + '\\\",' +\\n          '\\\"dataAsMap\\\": { \\\"completion\\\":  \\\"' + escape(result) +\\n          '\\\"}}';\\n    return json;\\n   \\n    \"\n"
+        + "        }\n" + "    ]\n" + "}";
+
     private static final String PIPELINE_TEMPLATE = "{\n"
         + "  \"response_processors\": [\n"
         + "    {\n"
@@ -195,6 +222,23 @@ public class RestMLRAGSearchProcessorIT extends RestMLRemoteInferenceIT {
         + "  }\n"
         + "}";
 
+    private static final String BM25_SEARCH_REQUEST_WITH_LLM_RESPONSE_FIELD_TEMPLATE = "{\n"
+        + "  \"_source\": [\"%s\"],\n"
+        + "  \"query\" : {\n"
+        + "    \"match\": {\"%s\": \"%s\"}\n"
+        + "  },\n"
+        + "   \"ext\": {\n"
+        + "      \"generative_qa_parameters\": {\n"
+        + "        \"llm_model\": \"%s\",\n"
+        + "        \"llm_question\": \"%s\",\n"
+        + "        \"context_size\": %d,\n"
+        + "        \"message_size\": %d,\n"
+        + "        \"timeout\": %d,\n"
+        + "        \"llm_response_field\": \"%s\"\n"
+        + "      }\n"
+        + "  }\n"
+        + "}";
+
     private static final String OPENAI_MODEL = "gpt-3.5-turbo";
     private static final String BEDROCK_ANTHROPIC_CLAUDE = "bedrock/anthropic-claude";
     private static final String TEST_DOC_PATH = "org/opensearch/ml/rest/test_data/";
@@ -466,6 +510,111 @@ public void testBM25WithBedrockWithConversation() throws Exception {
         assertNotNull(interactionId);
     }
 
+    public void testBM25WithCohere() throws Exception {
+        // Skip test if key is null
+        if (COHERE_API_KEY == null) {
+            return;
+        }
+        Response response = createConnector(COHERE_CONNECTOR_BLUEPRINT);
+        Map responseMap = parseResponseToMap(response);
+        String connectorId = (String) responseMap.get("connector_id");
+        response = registerRemoteModel("Cohere Chat Completion v1", connectorId);
+        responseMap = parseResponseToMap(response);
+        String taskId = (String) responseMap.get("task_id");
+        waitForTask(taskId, MLTaskState.COMPLETED);
+        response = getTask(taskId);
+        responseMap = parseResponseToMap(response);
+        String modelId = (String) responseMap.get("model_id");
+        response = deployRemoteModel(modelId);
+        responseMap = parseResponseToMap(response);
+        taskId = (String) responseMap.get("task_id");
+        waitForTask(taskId, MLTaskState.COMPLETED);
+
+        PipelineParameters pipelineParameters = new PipelineParameters();
+        pipelineParameters.tag = "testBM25WithCohere";
+        pipelineParameters.description = "desc";
+        pipelineParameters.modelId = modelId;
+        pipelineParameters.systemPrompt = "You are a helpful assistant";
+        pipelineParameters.userInstructions = "none";
+        pipelineParameters.context_field = "text";
+        Response response1 = createSearchPipeline("pipeline_test", pipelineParameters);
+        assertEquals(200, response1.getStatusLine().getStatusCode());
+
+        SearchRequestParameters requestParameters = new SearchRequestParameters();
+        requestParameters.source = "text";
+        requestParameters.match = "president";
+        requestParameters.llmModel = LlmIOUtil.COHERE_PROVIDER_PREFIX + "command";
+        requestParameters.llmQuestion = "who is lincoln";
+        requestParameters.contextSize = 5;
+        requestParameters.interactionSize = 5;
+        requestParameters.timeout = 60;
+        Response response2 = performSearch(INDEX_NAME, "pipeline_test", 5, requestParameters);
+        assertEquals(200, response2.getStatusLine().getStatusCode());
+
+        Map responseMap2 = parseResponseToMap(response2);
+        Map ext = (Map) responseMap2.get("ext");
+        assertNotNull(ext);
+        Map rag = (Map) ext.get("retrieval_augmented_generation");
+        assertNotNull(rag);
+
+        // TODO handle errors such as throttling
+        String answer = (String) rag.get("answer");
+        assertNotNull(answer);
+    }
+
+    public void testBM25WithCohereUsingLlmResponseField() throws Exception {
+        // Skip test if key is null
+        if (COHERE_API_KEY == null) {
+            return;
+        }
+        Response response = createConnector(COHERE_CONNECTOR_BLUEPRINT);
+        Map responseMap = parseResponseToMap(response);
+        String connectorId = (String) responseMap.get("connector_id");
+        response = registerRemoteModel("Cohere Chat Completion v1", connectorId);
+        responseMap = parseResponseToMap(response);
+        String taskId = (String) responseMap.get("task_id");
+        waitForTask(taskId, MLTaskState.COMPLETED);
+        response = getTask(taskId);
+        responseMap = parseResponseToMap(response);
+        String modelId = (String) responseMap.get("model_id");
+        response = deployRemoteModel(modelId);
+        responseMap = parseResponseToMap(response);
+        taskId = (String) responseMap.get("task_id");
+        waitForTask(taskId, MLTaskState.COMPLETED);
+
+        PipelineParameters pipelineParameters = new PipelineParameters();
+        pipelineParameters.tag = "testBM25WithCohereLlmResponseField";
+        pipelineParameters.description = "desc";
+        pipelineParameters.modelId = modelId;
+        pipelineParameters.systemPrompt = "You are a helpful assistant";
+        pipelineParameters.userInstructions = "none";
+        pipelineParameters.context_field = "text";
+        Response response1 = createSearchPipeline("pipeline_test", pipelineParameters);
+        assertEquals(200, response1.getStatusLine().getStatusCode());
+
+        SearchRequestParameters requestParameters = new SearchRequestParameters();
+        requestParameters.source = "text";
+        requestParameters.match = "president";
+        requestParameters.llmModel = "command";
+        requestParameters.llmQuestion = "who is lincoln";
+        requestParameters.contextSize = 5;
+        requestParameters.interactionSize = 5;
+        requestParameters.timeout = 60;
+        requestParameters.llmResponseField = "text";
+        Response response2 = performSearch(INDEX_NAME, "pipeline_test", 5, requestParameters);
+        assertEquals(200, response2.getStatusLine().getStatusCode());
+
+        Map responseMap2 = parseResponseToMap(response2);
+        Map ext = (Map) responseMap2.get("ext");
+        assertNotNull(ext);
+        Map rag = (Map) ext.get("retrieval_augmented_generation");
+        assertNotNull(rag);
+
+        // TODO handle errors such as throttling
+        String answer = (String) rag.get("answer");
+        assertNotNull(answer);
+    }
+
     private Response createSearchPipeline(String pipeline, PipelineParameters parameters) throws Exception {
         return makeRequest(
             client(),
@@ -492,7 +641,24 @@ private Response createSearchPipeline(String pipeline, PipelineParameters parame
     private Response performSearch(String indexName, String pipeline, int size, SearchRequestParameters requestParameters)
         throws Exception {
 
-        String httpEntity = (requestParameters.conversationId == null)
+        String httpEntity =
+            requestParameters.llmResponseField != null ?
+            String
+                .format(
+                    Locale.ROOT,
+                    BM25_SEARCH_REQUEST_WITH_LLM_RESPONSE_FIELD_TEMPLATE,
+                    requestParameters.source,
+                    requestParameters.source,
+                    requestParameters.match,
+                    requestParameters.llmModel,
+                    requestParameters.llmQuestion,
+                    requestParameters.contextSize,
+                    requestParameters.interactionSize,
+                    requestParameters.timeout,
+                    requestParameters.llmResponseField
+                )
+            :
+            (requestParameters.conversationId == null)
             ? String
                 .format(
                     Locale.ROOT,
@@ -560,5 +726,7 @@ static class SearchRequestParameters {
         int interactionSize;
         int timeout;
         String conversationId;
+
+        String llmResponseField;
     }
 }
@@ -143,6 +143,7 @@ public SearchResponse processResponse(SearchRequest request, SearchResponse resp
         }
         List<String> searchResults = getSearchResults(response, topN);
 
+        log.info("RAG request params: [{}]", params.getLlmResponseField());
         start = Instant.now();
         try {
             ChatCompletionOutput output = llm
@@ -155,7 +156,7 @@ public SearchResponse processResponse(SearchRequest request, SearchResponse resp
                             llmQuestion,
                             chatHistory,
                             searchResults,
-                            timeout
+                            timeout, params.getLlmResponseField()
                         )
                 );
             log.info("doChatCompletion complete. ({})", getDuration(start));
 
@@ -70,6 +70,10 @@ public class GenerativeQAParameters implements Writeable, ToXContentObject {
     // from a remote inference endpoint before timing out the request.
     private static final ParseField TIMEOUT = new ParseField("timeout");
 
+    // Optional parameter; this parameter indicates the name of the field in the LLM response
+    // that contains the chat completion text, i.e. "answer".
+    private static final ParseField LLM_RESPONSE_FIELD = new ParseField("llm_response_field");
+
     public static final int SIZE_NULL_VALUE = -1;
 
     static {
@@ -80,6 +84,7 @@ public class GenerativeQAParameters implements Writeable, ToXContentObject {
         PARSER.declareIntOrNull(GenerativeQAParameters::setContextSize, SIZE_NULL_VALUE, CONTEXT_SIZE);
         PARSER.declareIntOrNull(GenerativeQAParameters::setInteractionSize, SIZE_NULL_VALUE, INTERACTION_SIZE);
         PARSER.declareIntOrNull(GenerativeQAParameters::setTimeout, SIZE_NULL_VALUE, TIMEOUT);
+        PARSER.declareString(GenerativeQAParameters::setLlmResponseField, LLM_RESPONSE_FIELD);
     }
 
     @Setter
@@ -106,13 +111,18 @@ public class GenerativeQAParameters implements Writeable, ToXContentObject {
     @Getter
     private Integer timeout;
 
+    @Setter
+    @Getter
+    private String llmResponseField;
+
     public GenerativeQAParameters(
         String conversationId,
         String llmModel,
         String llmQuestion,
         Integer contextSize,
         Integer interactionSize,
-        Integer timeout
+        Integer timeout,
+        String llmResponseField
     ) {
         this.conversationId = conversationId;
         this.llmModel = llmModel;
@@ -124,6 +134,7 @@ public GenerativeQAParameters(
         this.contextSize = (contextSize == null) ? SIZE_NULL_VALUE : contextSize;
         this.interactionSize = (interactionSize == null) ? SIZE_NULL_VALUE : interactionSize;
         this.timeout = (timeout == null) ? SIZE_NULL_VALUE : timeout;
+        this.llmResponseField  = llmResponseField;
     }
 
     public GenerativeQAParameters(StreamInput input) throws IOException {
@@ -133,6 +144,7 @@ public GenerativeQAParameters(StreamInput input) throws IOException {
         this.contextSize = input.readInt();
         this.interactionSize = input.readInt();
         this.timeout = input.readInt();
+        this.llmResponseField = input.readOptionalString();
     }
 
     @Override
@@ -143,7 +155,8 @@ public XContentBuilder toXContent(XContentBuilder xContentBuilder, Params params
             .field(LLM_QUESTION.getPreferredName(), this.llmQuestion)
             .field(CONTEXT_SIZE.getPreferredName(), this.contextSize)
             .field(INTERACTION_SIZE.getPreferredName(), this.interactionSize)
-            .field(TIMEOUT.getPreferredName(), this.timeout);
+            .field(TIMEOUT.getPreferredName(), this.timeout)
+            .field(LLM_RESPONSE_FIELD.getPreferredName(), this.llmResponseField);
     }
 
     @Override
@@ -156,6 +169,7 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeInt(contextSize);
         out.writeInt(interactionSize);
         out.writeInt(timeout);
+        out.writeOptionalString(llmResponseField);
     }
 
     public static GenerativeQAParameters parse(XContentParser parser) throws IOException {
@@ -177,6 +191,7 @@ public boolean equals(Object o) {
             && Objects.equals(this.llmQuestion, other.getLlmQuestion())
             && (this.contextSize == other.getContextSize())
             && (this.interactionSize == other.getInteractionSize())
-            && (this.timeout == other.getTimeout());
+            && (this.timeout == other.getTimeout())
+            && Objects.equals(this.llmResponseField, other.getLlmResponseField());
     }
 }
@@ -43,4 +43,5 @@ public class ChatCompletionInput {
     private String systemPrompt;
     private String userInstructions;
     private Llm.ModelProvider modelProvider;
+    private String llmResponseField;
 }
@@ -87,7 +87,7 @@ public ChatCompletionOutput doChatCompletion(ChatCompletionInput chatCompletionI
 
         // TODO dataAsMap can be null or can contain information such as throttling. Handle non-happy cases.
 
-        return buildChatCompletionOutput(chatCompletionInput.getModelProvider(), dataAsMap);
+        return buildChatCompletionOutput(chatCompletionInput.getModelProvider(), dataAsMap, chatCompletionInput.getLlmResponseField());
     }
 
     protected Map<String, String> getInputParameters(ChatCompletionInput chatCompletionInput) {
@@ -105,7 +105,9 @@ protected Map<String, String> getInputParameters(ChatCompletionInput chatComplet
                 );
             inputParameters.put(CONNECTOR_INPUT_PARAMETER_MESSAGES, messages);
             // log.info("Messages to LLM: {}", messages);
-        } else if (chatCompletionInput.getModelProvider() == ModelProvider.BEDROCK) {
+        } else if (chatCompletionInput.getModelProvider() == ModelProvider.BEDROCK
+            || chatCompletionInput.getModelProvider() == ModelProvider.COHERE
+            || chatCompletionInput.getLlmResponseField() != null) {
             inputParameters
                 .put(
                     "inputs",
@@ -126,12 +128,24 @@ protected Map<String, String> getInputParameters(ChatCompletionInput chatComplet
         return inputParameters;
     }
 
-    protected ChatCompletionOutput buildChatCompletionOutput(ModelProvider provider, Map<String, ?> dataAsMap) {
+    protected ChatCompletionOutput buildChatCompletionOutput(ModelProvider provider, Map<String, ?> dataAsMap, String llmResponseField) {
 
         List<Object> answers = null;
         List<String> errors = null;
 
-        if (provider == ModelProvider.OPENAI) {
+        if (llmResponseField != null) {
+            String response = (String) dataAsMap.get(llmResponseField);
+            if (response != null) {
+                answers = List.of(response);
+            } else {
+                Map error = (Map) dataAsMap.get("error");
+                if (error != null) {
+                    errors = List.of((String) error.get("message"));
+                } else {
+                    errors = List.of("Unknown error or response.");
+                }
+            }
+        } else if (provider == ModelProvider.OPENAI) {
             List choices = (List) dataAsMap.get(CONNECTOR_OUTPUT_CHOICES);
             if (choices == null) {
                 Map error = (Map) dataAsMap.get(CONNECTOR_OUTPUT_ERROR);
@@ -161,6 +175,19 @@ protected ChatCompletionOutput buildChatCompletionOutput(ModelProvider provider,
                     errors = List.of("Unknown error or response.");
                 }
             }
+        } else if (provider == ModelProvider.COHERE) {
+            String response = (String) dataAsMap.get("text");
+            if (response != null) {
+                answers = List.of(response);
+            } else {
+                Map error = (Map) dataAsMap.get("error");
+                if (error != null) {
+                    errors = List.of((String) error.get("message"));
+                } else {
+                    errors = List.of("Unknown error or response.");
+                    log.error("{}", dataAsMap);
+                }
+            }
         } else {
             throw new IllegalArgumentException("Unknown/unsupported model provider: " + provider);
         }
 
@@ -25,7 +25,8 @@ public interface Llm {
     // TODO Ensure the current implementation works with all models supported by Bedrock.
     enum ModelProvider {
         OPENAI,
-        BEDROCK
+        BEDROCK,
+        COHERE
     }
 
     ChatCompletionOutput doChatCompletion(ChatCompletionInput input);
Original file line number	Diff line number	Diff line change
`@@ -43,4 +43,5 @@ public class ChatCompletionInput {`
`43`	`43`	`private String systemPrompt;`
`44`	`44`	`private String userInstructions;`
`45`	`45`	`private Llm.ModelProvider modelProvider;`
	`46`	`+ private String llmResponseField;`
`46`	`47`	`}`
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,8 @@ public interface Llm {`
`25`	`25`	`// TODO Ensure the current implementation works with all models supported by Bedrock.`
`26`	`26`	`enum ModelProvider {`
`27`	`27`	`OPENAI,`
`28`		`- BEDROCK`
	`28`	`+ BEDROCK,`
	`29`	`+ COHERE`
`29`	`30`	`}`
`30`	`31`
`31`	`32`	`ChatCompletionOutput doChatCompletion(ChatCompletionInput input);`