From a51392199c19c4fecf5e40b84f070a9ecf025ecb Mon Sep 17 00:00:00 2001 From: John Christopher McAlpine Date: Sat, 11 Mar 2023 15:57:59 -0500 Subject: [PATCH] add complex "one of" types and other miscellaneous type fixups --- src/models/create_answer_request.ml | 4 +- src/models/create_chat_completion_request.ml | 2 +- ...create_chat_completion_response_choices.ml | 9 +-- src/models/create_completion_request.ml | 10 ++- src/models/create_embedding_request.ml | 6 +- src/models/create_moderation_request.ml | 4 +- src/models/engine.ml | 4 +- src/models/model.ml | 2 +- src/support/multiTypes.ml | 76 +++++++++++++++++++ src/support/request.ml | 8 +- 10 files changed, 101 insertions(+), 24 deletions(-) create mode 100644 src/support/multiTypes.ml diff --git a/src/models/create_answer_request.ml b/src/models/create_answer_request.ml index cc69d39..bc2d7f5 100644 --- a/src/models/create_answer_request.ml +++ b/src/models/create_answer_request.ml @@ -28,8 +28,8 @@ type t = { logprobs: int32 option [@default None]; (* The maximum number of tokens allowed for the generated answer *) max_tokens: int32 option [@default None]; - (* completions_stop_description *) - stop: One_ofstringarray.t option [@default None]; + (* Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. *) + stop: string array option [@default None]; (* How many answers to generate for each question. *) n: int32 option [@default None]; (* If set to `true`, the returned JSON will include a \''prompt\'' field containing the final prompt that was used to request a completion. This is mainly useful for debugging purposes. *) diff --git a/src/models/create_chat_completion_request.ml b/src/models/create_chat_completion_request.ml index df1462c..ece4320 100644 --- a/src/models/create_chat_completion_request.ml +++ b/src/models/create_chat_completion_request.ml @@ -19,7 +19,7 @@ type t = { (* If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. *) stream: bool option [@default None]; (* Up to 4 sequences where the API will stop generating further tokens. *) - stop: One_ofstringarray.t option [@default None]; + stop: string array option [@default None]; (* The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens). *) max_tokens: int32 option [@default None]; (* completions_presence_penalty_description *) diff --git a/src/models/create_chat_completion_response_choices.ml b/src/models/create_chat_completion_response_choices.ml index 76f8a96..eb9a21f 100644 --- a/src/models/create_chat_completion_response_choices.ml +++ b/src/models/create_chat_completion_response_choices.ml @@ -7,13 +7,6 @@ type t = { index: int32 option [@default None]; - message: Chat_completion_response_message.t option [@default None]; + message: Chat_completion_response_message.t; finish_reason: string option [@default None]; } [@@deriving yojson { strict = false }, show ];; - -let create () : t = { - index = None; - message = None; - finish_reason = None; -} - diff --git a/src/models/create_completion_request.ml b/src/models/create_completion_request.ml index 27de82d..24beb09 100644 --- a/src/models/create_completion_request.ml +++ b/src/models/create_completion_request.ml @@ -8,8 +8,12 @@ type t = { (* ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them. *) model: string; - (* The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. *) - prompt: One_ofstringarrayarrayarray.t option [@default None]; + (** + * The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. + * + * Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. + *) + prompt: MultiTypes.StringOrStringArrayOrIntArrayOrIntArrayArray.t option [@default None]; (* The suffix that comes after a completion of inserted text. *) suffix: string option [@default None]; (* The maximum number of [tokens](/tokenizer) to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). *) @@ -27,7 +31,7 @@ type t = { (* Echo back the prompt in addition to the completion *) echo: bool option [@default None]; (* Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. *) - stop: One_ofstringarray.t option [@default None]; + stop: string array option [@default None]; (* Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. [See more information about frequency and presence penalties.](/docs/api-reference/parameter-details) *) presence_penalty: float option [@default None]; (* Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. [See more information about frequency and presence penalties.](/docs/api-reference/parameter-details) *) diff --git a/src/models/create_embedding_request.ml b/src/models/create_embedding_request.ml index fb2adf3..2fbcf41 100644 --- a/src/models/create_embedding_request.ml +++ b/src/models/create_embedding_request.ml @@ -6,11 +6,11 @@ *) type t = { - (* Input text to get embeddings for, encoded as a string or array of tokens. To get embeddings for multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed 8192 tokens in length. *) - input: One_ofstringarrayarrayarray.t; + (* Input text to get embeddings for, encoded as a string, array of strings, array of tokens, or array of token arrays. Each input must not exceed 8192 tokens in length. *) + input: MultiTypes.StringOrStringArrayOrIntArrayOrIntArrayArray.t; } [@@deriving yojson { strict = false }, show ];; -let create (input : One_ofstringarrayarrayarray.t) : t = { +let create input : t = { input = input; } diff --git a/src/models/create_moderation_request.ml b/src/models/create_moderation_request.ml index 447dc43..ac1ef1f 100644 --- a/src/models/create_moderation_request.ml +++ b/src/models/create_moderation_request.ml @@ -7,12 +7,12 @@ type t = { (* The input text to classify *) - input: One_ofstringarray.t; + input: string array; (* Two content moderations models are available: `text-moderation-stable` and `text-moderation-latest`. The default is `text-moderation-latest` which will be automatically upgraded over time. This ensures you are always using our most accurate model. If you use `text-moderation-stable`, we will provide advanced notice before updating the model. Accuracy of `text-moderation-stable` may be slightly lower than for `text-moderation-latest`. *) model: string option [@default None]; } [@@deriving yojson { strict = false }, show ];; -let create (input : One_ofstringarray.t) : t = { +let create input : t = { input = input; model = None; } diff --git a/src/models/engine.ml b/src/models/engine.ml index 9877675..aa9f350 100644 --- a/src/models/engine.ml +++ b/src/models/engine.ml @@ -7,12 +7,12 @@ type t = { id: string; - _object: string; + _object: string [@key "object"]; created: int32 option; ready: bool; } [@@deriving yojson { strict = false }, show ];; -let create (id : string) (_object : string) (created : int32option) (ready : bool) : t = { +let create (id : string) (_object : string) (created : int32 option) (ready : bool) : t = { id = id; _object = _object; created = created; diff --git a/src/models/model.ml b/src/models/model.ml index 97c614b..b3707a1 100644 --- a/src/models/model.ml +++ b/src/models/model.ml @@ -7,7 +7,7 @@ type t = { id: string; - _object: string; + _object: string [@key "object"]; created: int32; owned_by: string; } [@@deriving yojson { strict = false }, show ];; diff --git a/src/support/multiTypes.ml b/src/support/multiTypes.ml new file mode 100644 index 0000000..d7b7400 --- /dev/null +++ b/src/support/multiTypes.ml @@ -0,0 +1,76 @@ +let stringArray_of_yojson x: ([> `StringArray of string array], string) result = + match x with + | `List l -> + let rec aux = function + | [] -> Ok [] + | `String s :: tl -> + Result.map (fun l -> s :: l) (aux tl) + | _ -> Error "stringArray_of_yojson: not a string array" + in + Result.map (fun rl -> `StringArray(Array.of_list rl)) (aux l) + | _ -> Error "stringArray_of_yojson: not a string array" + +let intArray_of_yojson x: ([> `IntArray of int array], string) result = + match x with + | `List l -> + let rec aux = function + | [] -> Ok [] + | `Int i :: tl -> + Result.map (fun l -> i :: l) (aux tl) + | _ -> Error "intArray_of_yojson: not an int array" + in + Result.map (fun rl -> `IntArray(Array.of_list rl)) (aux l) + | _ -> Error "intArray_of_yojson: not an int array" + + (* TODO: see if there's a less awful way to organize this *) +module StringOrStringArrayOrIntArrayOrIntArrayArray = struct + type t = + [ `String of string + | `StringArray of string array + | `IntArray of int array + | `IntArrayArray of int array array + ] + + let pp ppf = function + | `String s -> Format.fprintf ppf "%s" s + | `StringArray a -> Format.fprintf ppf "%a" (Format.pp_print_list Format.pp_print_string) (Array.to_list a) + | `IntArray a -> Format.fprintf ppf "%a" (Format.pp_print_list Format.pp_print_int) (Array.to_list a) + | `IntArrayArray a -> + Format.fprintf + ppf + "%a" + (Format.pp_print_list (Format.pp_print_list Format.pp_print_int)) (Array.to_list (Array.map Array.to_list a)) + + let to_yojson (t: t): Yojson.Safe.t = + match t with + | `String s -> `String s + | `StringArray a -> `List (List.map (fun s -> `String s) (Array.to_list a)) + | `IntArray a -> `List (List.map (fun i -> `Int i) (Array.to_list a)) + | `IntArrayArray a -> `List (List.map (fun a -> `List (List.map (fun i -> `Int i) (Array.to_list a))) (Array.to_list a)) + + let of_yojson (x: Yojson.Safe.t): (t, string) result = + let errorMessage = "StringOrStringArrayOrIntArrayOrIntArrayArray.of_yojson: not a string or string array or int array or int array array" in + match x with + | `String s -> Ok (`String s) + | `List l -> + (* may still be a string array, int array, or int array array, we can tell by looking at the first element *) + begin match l with + | [] -> Ok(`StringArray [||]) + | `String _ :: _ -> stringArray_of_yojson x + | `Int _ :: _ -> intArray_of_yojson x + | `List _ :: _ -> + (* either an int array array, or invalid *) + let rec aux = function + | [] -> Ok [] + | `List l :: tl -> + let ( let* ) = Result.bind in + let* `IntArray l' = intArray_of_yojson (`List l) in + let* tl' = aux tl in + Ok (l' :: tl') + | _ -> Error errorMessage + in + Result.map (fun rl -> `IntArrayArray(Array.of_list rl)) (aux l) + | _ -> Error errorMessage + end + | _ -> Error errorMessage +end \ No newline at end of file diff --git a/src/support/request.ml b/src/support/request.ml index a1180c2..f6a5ef8 100644 --- a/src/support/request.ml +++ b/src/support/request.ml @@ -1,6 +1,10 @@ -let api_key = "" +let api_key = Sys.getenv "OPENAI_API_KEY" let base_url = "https://api.openai.com/v1" -let default_headers = Cohttp.Header.init_with "Content-Type" "application/json" +let default_headers = + Cohttp.Header.add + (Cohttp.Header.init_with "Content-Type" "application/json") + "Authorization" + ("Bearer " ^ api_key) let option_fold f default o = match o with