-
Notifications
You must be signed in to change notification settings - Fork 187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update RunRequest class to accept new create run parameters #360
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package com.aallam.openai.api.assistant | ||
|
||
import kotlinx.serialization.SerialName | ||
import kotlinx.serialization.Serializable | ||
|
||
@Serializable | ||
public data class AssistantTruncationStrategy( | ||
/** | ||
* The truncation strategy to use for the thread. | ||
* The default is auto. | ||
* If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread. | ||
* When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`. | ||
*/ | ||
@SerialName("type") val type: String, | ||
|
||
/** | ||
* The number of most recent messages from the thread when constructing the context for the run. | ||
*/ | ||
@SerialName("last_messages") val lastMessages: Int? = null | ||
) | ||
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2,7 +2,9 @@ package com.aallam.openai.api.run | |||||
|
||||||
import com.aallam.openai.api.BetaOpenAI | ||||||
import com.aallam.openai.api.assistant.AssistantId | ||||||
import com.aallam.openai.api.assistant.AssistantResponseFormat | ||||||
import com.aallam.openai.api.assistant.AssistantTool | ||||||
import com.aallam.openai.api.assistant.AssistantTruncationStrategy | ||||||
import com.aallam.openai.api.model.ModelId | ||||||
import kotlinx.serialization.SerialName | ||||||
import kotlinx.serialization.Serializable | ||||||
|
@@ -24,6 +26,7 @@ public data class RunRequest( | |||||
* If not, the model associated with the assistant will be used. | ||||||
*/ | ||||||
@SerialName("model") val model: ModelId? = null, | ||||||
|
||||||
/** | ||||||
* Override the default system message of the assistant. | ||||||
* This is useful for modifying the behavior on a per-run basis. | ||||||
|
@@ -48,6 +51,45 @@ public data class RunRequest( | |||||
* Keys can be a maximum of 64 characters long, and values can be a maximum of 512 characters long. | ||||||
*/ | ||||||
@SerialName("metadata") val metadata: Map<String, String>? = null, | ||||||
|
||||||
/** | ||||||
* What sampling temperature to use, between 0 and 2. | ||||||
* Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. | ||||||
*/ | ||||||
@SerialName("temperature") val temperature: Int? = null, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I believe this should be a floating number |
||||||
|
||||||
/** | ||||||
* An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. | ||||||
* So 0.1 means only the tokens comprising the top 10% probability mass are considered. | ||||||
*/ | ||||||
@SerialName("top_p") val topP: Int? = null, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Same here :) |
||||||
|
||||||
/** | ||||||
* The maximum number of prompt tokens that may be used over the course of the run. | ||||||
* The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. | ||||||
* If the run exceeds the number of prompt tokens specified, the run will end with status `incomplete`. | ||||||
* See `incomplete_details` for more info. | ||||||
*/ | ||||||
@SerialName("max_prompt_tokens") val maxPromptTokens: Int? = null, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I would suggest using Longs here |
||||||
|
||||||
/** | ||||||
* The maximum number of completion tokens that may be used over the course of the run. | ||||||
* The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. | ||||||
* If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. | ||||||
* See `incomplete_details` for more info. | ||||||
*/ | ||||||
@SerialName("max_completion_tokens") val maxCompletionTokens: Int? = null, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Same suggestion here |
||||||
|
||||||
/** | ||||||
* Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run. | ||||||
*/ | ||||||
@SerialName("truncation_strategy") val truncationStrategy: AssistantTruncationStrategy? = null, | ||||||
|
||||||
/** | ||||||
* Specifies the format that the model must output. | ||||||
* Compatible with GPT-4o, GPT-4 Turbo, and all GPT-3.5 Turbo models since gpt-3.5-turbo-1106. | ||||||
*/ | ||||||
@SerialName("response_format") val responseFormat: AssistantResponseFormat? = null, | ||||||
) | ||||||
|
||||||
/** | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we can improve a little bit the API here, with something like this: