aallam · saluzafa · Jun 21, 2024 · aallam · Jun 23, 2024 · aallam
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantRequest.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantRequest.kt
@@ -2,6 +2,7 @@ package com.aallam.openai.api.assistant
 
 import com.aallam.openai.api.BetaOpenAI
 import com.aallam.openai.api.OpenAIDsl
+import com.aallam.openai.api.file.FileId
 import com.aallam.openai.api.model.ModelId
 import kotlinx.serialization.SerialName
 import kotlinx.serialization.Serializable
@@ -77,6 +78,8 @@ public data class AssistantRequest(
      * the conversation exceeded the max context length.
      */
     @SerialName("response_format") val responseFormat: AssistantResponseFormat? = null,
+
+    @SerialName("file_ids") val fileIds: List<FileId>? = null,
 )
 
 @BetaOpenAI

diff --git a/...core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantTruncationStrategy.kt b/...core/src/commonMain/kotlin/com.aallam.openai.api/assistant/AssistantTruncationStrategy.kt
@@ -0,0 +1,20 @@
+package com.aallam.openai.api.assistant
+
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+
+@Serializable
+public data class AssistantTruncationStrategy(
+    /**
+     * The truncation strategy to use for the thread.
+     * The default is auto.
+     * If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread.
+     * When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`.
+     */
+    @SerialName("type") val type: String,
+
+    /**
+     * The number of most recent messages from the thread when constructing the context for the run.
+     */
+    @SerialName("last_messages") val lastMessages: Int? = null
+)
-    @SerialName("type") val type: String,
-
-    /**
-     * The number of most recent messages from the thread when constructing the context for the run.
-     */
-    @SerialName("last_messages") val lastMessages: Int? = null
-)
+    @SerialName("type") val type: TruncationStrategyType,
+
+    /**
+     * The number of most recent messages from the thread when constructing the context for the run.
+     */
+    @SerialName("last_messages") val lastMessages: Int? = null
+)
+
+/**
+ * The truncation strategy to use for the thread.
+ */
+@JvmInline
+@Serializable
+public value class TruncationStrategyType(public val value: String) {
+    public companion object {
+        public val TruncationStrategyType: Auto = TruncationStrategyType("auto")
+        public val TruncationStrategyType LastMessages = TruncationStrategyType("last_messages")
+    }
+}
-    @SerialName("type") val type: String,
-
-    /**
-     * The number of most recent messages from the thread when constructing the context for the run.
-     */
-    @SerialName("last_messages") val lastMessages: Int? = null
-)
+    @SerialName("type") val type: TruncationStrategyType,
+
+    /**
+     * The number of most recent messages from the thread when constructing the context for the run.
+     */
+    @SerialName("last_messages") val lastMessages: Int? = null
+)
+
+/**
+ * The truncation strategy to use for the thread.
+ */
+@JvmInline
+@Serializable
+public value class TruncationStrategyType(public val value: String) {
+    public companion object {
+        public val TruncationStrategyType: Auto = TruncationStrategyType("auto")
+        public val TruncationStrategyType LastMessages = TruncationStrategyType("last_messages")
+    }
+}
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/run/RunRequest.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/run/RunRequest.kt
@@ -2,7 +2,9 @@ package com.aallam.openai.api.run
 
 import com.aallam.openai.api.BetaOpenAI
 import com.aallam.openai.api.assistant.AssistantId
+import com.aallam.openai.api.assistant.AssistantResponseFormat
 import com.aallam.openai.api.assistant.AssistantTool
+import com.aallam.openai.api.assistant.AssistantTruncationStrategy
 import com.aallam.openai.api.model.ModelId
 import kotlinx.serialization.SerialName
 import kotlinx.serialization.Serializable
@@ -24,6 +26,7 @@ public data class RunRequest(
      * If not, the model associated with the assistant will be used.
      */
     @SerialName("model") val model: ModelId? = null,
+
     /**
      * Override the default system message of the assistant.
      * This is useful for modifying the behavior on a per-run basis.
@@ -48,6 +51,45 @@ public data class RunRequest(
      * Keys can be a maximum of 64 characters long, and values can be a maximum of 512 characters long.
      */
     @SerialName("metadata") val metadata: Map<String, String>? = null,
+
+    /**
+     * What sampling temperature to use, between 0 and 2.
+     * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+     */
+    @SerialName("temperature") val temperature: Int? = null,
-    @SerialName("temperature") val temperature: Int? = null,
+    @SerialName("temperature") val temperature: Double? = null,
-    @SerialName("temperature") val temperature: Int? = null,
+    @SerialName("temperature") val temperature: Double? = null,
+
+    /**
+     * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+     * So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+     */
+    @SerialName("top_p") val topP: Int? = null,
-    @SerialName("top_p") val topP: Int? = null,
+    @SerialName("top_p") val topP: Double? = null,
-    @SerialName("top_p") val topP: Int? = null,
+    @SerialName("top_p") val topP: Double? = null,
+
+    /**
+     * The maximum number of prompt tokens that may be used over the course of the run.
+     * The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run.
+     * If the run exceeds the number of prompt tokens specified, the run will end with status `incomplete`.
+     * See `incomplete_details` for more info.
+     */
+    @SerialName("max_prompt_tokens") val maxPromptTokens: Int? = null,
-    @SerialName("max_prompt_tokens") val maxPromptTokens: Int? = null,
+    @SerialName("max_prompt_tokens") val maxPromptTokens: Long? = null,
-    @SerialName("max_prompt_tokens") val maxPromptTokens: Int? = null,
+    @SerialName("max_prompt_tokens") val maxPromptTokens: Long? = null,
+
+    /**
+     * The maximum number of completion tokens that may be used over the course of the run.
+     * The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run.
+     * If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`.
+     * See `incomplete_details` for more info.
+     */
+    @SerialName("max_completion_tokens") val maxCompletionTokens: Int? = null,
-    @SerialName("max_completion_tokens") val maxCompletionTokens: Int? = null,
+    @SerialName("max_completion_tokens") val maxCompletionTokens: Long? = null,
-    @SerialName("max_completion_tokens") val maxCompletionTokens: Int? = null,
+    @SerialName("max_completion_tokens") val maxCompletionTokens: Long? = null,
+
+    /**
+     * Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run.
+     */
+    @SerialName("truncation_strategy") val truncationStrategy: AssistantTruncationStrategy? = null,
+
+    /**
+     * Specifies the format that the model must output.
+     * Compatible with GPT-4o, GPT-4 Turbo, and all GPT-3.5 Turbo models since gpt-3.5-turbo-1106.
+     */
+    @SerialName("response_format") val responseFormat: AssistantResponseFormat? = null,
 )
 
 /**