diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantRequest.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantRequest.kt index 26c95a0d..318315c9 100644 --- a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantRequest.kt +++ b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantRequest.kt @@ -2,6 +2,7 @@ package com.aallam.openai.api.assistant import com.aallam.openai.api.BetaOpenAI import com.aallam.openai.api.OpenAIDsl +import com.aallam.openai.api.file.FileId import com.aallam.openai.api.model.ModelId import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable @@ -77,6 +78,8 @@ public data class AssistantRequest( * the conversation exceeded the max context length. */ @SerialName("response_format") val responseFormat: AssistantResponseFormat? = null, + + @SerialName("file_ids") val fileIds: List? = null, ) @BetaOpenAI diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantTruncationStrategy.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantTruncationStrategy.kt new file mode 100644 index 00000000..352f4552 --- /dev/null +++ b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantTruncationStrategy.kt @@ -0,0 +1,20 @@ +package com.aallam.openai.api.assistant + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +@Serializable +public data class AssistantTruncationStrategy( + /** + * The truncation strategy to use for the thread. + * The default is auto. + * If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread. + * When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`. + */ + @SerialName("type") val type: String, + + /** + * The number of most recent messages from the thread when constructing the context for the run. + */ + @SerialName("last_messages") val lastMessages: Int? = null +) diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/run/RunRequest.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/run/RunRequest.kt index a41334bc..a73906e2 100644 --- a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/run/RunRequest.kt +++ b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/run/RunRequest.kt @@ -2,7 +2,9 @@ package com.aallam.openai.api.run import com.aallam.openai.api.BetaOpenAI import com.aallam.openai.api.assistant.AssistantId +import com.aallam.openai.api.assistant.AssistantResponseFormat import com.aallam.openai.api.assistant.AssistantTool +import com.aallam.openai.api.assistant.AssistantTruncationStrategy import com.aallam.openai.api.model.ModelId import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable @@ -24,6 +26,7 @@ public data class RunRequest( * If not, the model associated with the assistant will be used. */ @SerialName("model") val model: ModelId? = null, + /** * Override the default system message of the assistant. * This is useful for modifying the behavior on a per-run basis. @@ -48,6 +51,45 @@ public data class RunRequest( * Keys can be a maximum of 64 characters long, and values can be a maximum of 512 characters long. */ @SerialName("metadata") val metadata: Map? = null, + + /** + * What sampling temperature to use, between 0 and 2. + * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. + */ + @SerialName("temperature") val temperature: Int? = null, + + /** + * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. + * So 0.1 means only the tokens comprising the top 10% probability mass are considered. + */ + @SerialName("top_p") val topP: Int? = null, + + /** + * The maximum number of prompt tokens that may be used over the course of the run. + * The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. + * If the run exceeds the number of prompt tokens specified, the run will end with status `incomplete`. + * See `incomplete_details` for more info. + */ + @SerialName("max_prompt_tokens") val maxPromptTokens: Int? = null, + + /** + * The maximum number of completion tokens that may be used over the course of the run. + * The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. + * If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. + * See `incomplete_details` for more info. + */ + @SerialName("max_completion_tokens") val maxCompletionTokens: Int? = null, + + /** + * Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run. + */ + @SerialName("truncation_strategy") val truncationStrategy: AssistantTruncationStrategy? = null, + + /** + * Specifies the format that the model must output. + * Compatible with GPT-4o, GPT-4 Turbo, and all GPT-3.5 Turbo models since gpt-3.5-turbo-1106. + */ + @SerialName("response_format") val responseFormat: AssistantResponseFormat? = null, ) /**