From 86c7e2ab9aee8a92c84509194348f145a2f8862a Mon Sep 17 00:00:00 2001
From: Quest Henkart <qhenkart@gmail.com>
Date: Mon, 15 Apr 2024 14:58:28 +0800
Subject: [PATCH] add cost saving parameters

---
 run.go | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/run.go b/run.go
index 1f3cb7eb7..0ef31cd9a 100644
--- a/run.go
+++ b/run.go
@@ -28,6 +28,16 @@ type Run struct {
 	Metadata       map[string]any     `json:"metadata"`
 	Usage          Usage              `json:"usage,omitempty"`
 
+	Temperature *int `json:"temperature,omitempty"`
+	// The maximum number of prompt tokens that may be used over the course of the run.
+	// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'
+	MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
+	// The maximum number of completion tokens that may be used over the course of the run.
+	// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'
+	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
+	// ThreadTruncationStrategy defines the truncation strategy to use for the thread
+	TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
+
 	httpHeader
 }
 
@@ -78,8 +88,41 @@ type RunRequest struct {
 	AdditionalInstructions string         `json:"additional_instructions,omitempty"`
 	Tools                  []Tool         `json:"tools,omitempty"`
 	Metadata               map[string]any `json:"metadata,omitempty"`
+
+	// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+	Temperature *int `json:"temperature,omitempty"`
+
+	// The maximum number of prompt tokens that may be used over the course of the run.
+	// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'
+	MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
+
+	// The maximum number of completion tokens that may be used over the course of the run.
+	// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'
+	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
+
+	// ThreadTruncationStrategy defines the truncation strategy to use for the thread
+	TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
 }
 
+// ThreadTruncationStrategy defines the truncation strategy to use for the thread
+// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy
+type ThreadTruncationStrategy struct {
+	// default 'auto'
+	Type TruncationStrategy `json:"type,omitempty"`
+	// this field should be set if the truncation strategy is set to LastMessages
+	LastMessages *int `json:"last_messages,omitempty"`
+}
+
+// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant
+type TruncationStrategy string
+
+const (
+	// TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model
+	TruncationStrategyAuto = TruncationStrategy("auto")
+	// TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread
+	TruncationStrategyLastMessages = TruncationStrategy("last_messages")
+)
+
 type RunModifyRequest struct {
 	Metadata map[string]any `json:"metadata,omitempty"`
 }