Azure · lmazuel · Jan 28, 2023 · Jan 27, 2023 · Jan 27, 2023 · Jan 27, 2023
diff --git a/specification/cognitiveservices/OpenAI.Inference/main.cadl b/specification/cognitiveservices/OpenAI.Inference/main.cadl
@@ -31,7 +31,12 @@ https://westus.api.cognitive.microsoft.com).
 )
 @versioned(ServiceApiVersions)
 @versionedDependency(
-  [[ServiceApiVersions.v2022_06_01_preview, Azure.Core.Versions.v1_0_Preview_1]]
+  [
+    [
+      ServiceApiVersions.v2022_06_01_preview,
+      Azure.Core.Versions.v1_0_Preview_1
+    ]
+  ]
 )
 @doc("Azure OpenAI APIs for completions and search")
 namespace Azure.OpenAI;

diff --git a/specification/cognitiveservices/OpenAI.Inference/models/completions.create.cadl b/specification/cognitiveservices/OpenAI.Inference/models/completions.create.cadl
@@ -138,6 +138,8 @@ model Completions {
     "model"?: string;
     @doc("Array of choices returned containing text completions to prompts sent")
     choices?: Choice[];
+    @doc("Usage counts for tokens input using the completions API")
+    usage: CompletionsUsage;
 }
 
 @doc("Choice model within completion response")
@@ -163,3 +165,13 @@ model CompletionsLogProbsModel {
     @doc("Text offset")
     text_offset?: int32[];
 }
+
+@doc("Measurment of the amount of tokens used in this request and response")
+model CompletionsUsage {
+    @doc("Number of tokens received in the completion")
+    completion_token: int32,
+    @doc("Number of tokens sent in the original request")
+    prompt_tokens: int32,
+    @doc("Total number of tokens transacted in this request/response")
+    total_tokens: int32
+}
diff --git a/specification/cognitiveservices/OpenAI.Inference/models/embeddings.create.cadl b/specification/cognitiveservices/OpenAI.Inference/models/embeddings.create.cadl
@@ -24,20 +24,26 @@ model EmbeddingsOptions {
     as we have observed inferior results when newlines are present.
     """)
     input: string | string[];
+
+    @doc("Usage counts for tokens input using the embeddings API")
+    usage: EmbeddingsUsage;
 };
 
 model Embeddings {
     object: "list",
     data: EmbeddingItem[],
 }
 
-model Usage {
-    prompt_tokens: int32,
-    total_tokens: int32
-}
-
 model EmbeddingItem {
     object: "embedding",
     embedding: float32[];
     index: int32;
 }
+
+@doc("Measurment of the amount of tokens used in this request and response")
+model EmbeddingsUsage {
+    @doc("Number of tokens sent in the original request")
+    prompt_tokens: int32,
+    @doc("Total number of tokens transacted in this request/response")
+    total_tokens: int32
+}