From 86c7e2ab9aee8a92c84509194348f145a2f8862a Mon Sep 17 00:00:00 2001 From: Quest Henkart Date: Mon, 15 Apr 2024 14:58:28 +0800 Subject: [PATCH] add cost saving parameters --- run.go | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/run.go b/run.go index 1f3cb7eb7..0ef31cd9a 100644 --- a/run.go +++ b/run.go @@ -28,6 +28,16 @@ type Run struct { Metadata map[string]any `json:"metadata"` Usage Usage `json:"usage,omitempty"` + Temperature *int `json:"temperature,omitempty"` + // The maximum number of prompt tokens that may be used over the course of the run. + // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete' + MaxPromptTokens int `json:"max_prompt_tokens,omitempty"` + // The maximum number of completion tokens that may be used over the course of the run. + // If the run exceeds the number of completion tokens specified, the run will end with status 'complete' + MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` + // ThreadTruncationStrategy defines the truncation strategy to use for the thread + TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"` + httpHeader } @@ -78,8 +88,41 @@ type RunRequest struct { AdditionalInstructions string `json:"additional_instructions,omitempty"` Tools []Tool `json:"tools,omitempty"` Metadata map[string]any `json:"metadata,omitempty"` + + // What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. + Temperature *int `json:"temperature,omitempty"` + + // The maximum number of prompt tokens that may be used over the course of the run. + // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete' + MaxPromptTokens int `json:"max_prompt_tokens,omitempty"` + + // The maximum number of completion tokens that may be used over the course of the run. + // If the run exceeds the number of completion tokens specified, the run will end with status 'complete' + MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` + + // ThreadTruncationStrategy defines the truncation strategy to use for the thread + TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"` } +// ThreadTruncationStrategy defines the truncation strategy to use for the thread +// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy +type ThreadTruncationStrategy struct { + // default 'auto' + Type TruncationStrategy `json:"type,omitempty"` + // this field should be set if the truncation strategy is set to LastMessages + LastMessages *int `json:"last_messages,omitempty"` +} + +// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant +type TruncationStrategy string + +const ( + // TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model + TruncationStrategyAuto = TruncationStrategy("auto") + // TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread + TruncationStrategyLastMessages = TruncationStrategy("last_messages") +) + type RunModifyRequest struct { Metadata map[string]any `json:"metadata,omitempty"` }