Release 2.1.6

cartesia-ai · Feb 16, 2025 · 5073aab · 5073aab
1 parent 6e167ad
commit 5073aab
Show file tree

Hide file tree

Showing 12 changed files with 289 additions and 352 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@cartesia/cartesia-js",
-    "version": "2.1.5",
+    "version": "2.1.6",
     "private": false,
     "repository": "https://github.com/cartesia-ai/cartesia-js",
     "main": "./index.js",

diff --git a/reference.md b/reference.md
@@ -98,7 +98,7 @@ await client.datasets.list();
 
 ```typescript
 await client.datasets.create({
-    name: "string",
+    name: "name",
 });
 ```
 
@@ -147,7 +147,7 @@ await client.datasets.create({
 <dd>
 
 ```typescript
-await client.datasets.listFiles("string");
+await client.datasets.listFiles("id");
 ```
 
 </dd>
@@ -182,70 +182,6 @@ await client.datasets.listFiles("string");
 </dl>
 </details>
 
-<details><summary><code>client.datasets.<a href="/src/api/resources/datasets/client/Client.ts">uploadFile</a>(file, id, { ...params }) -> void</code></summary>
-<dl>
-<dd>
-
-#### 🔌 Usage
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-```typescript
-await client.datasets.uploadFile(fs.createReadStream("/path/to/your/file"), "string", {});
-```
-
-</dd>
-</dl>
-</dd>
-</dl>
-
-#### ⚙️ Parameters
-
-<dl>
-<dd>
-
-<dl>
-<dd>
-
-**file:** `File | fs.ReadStream | Blob`
-
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**id:** `string`
-
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**request:** `Cartesia.UploadDatasetFileRequest`
-
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**requestOptions:** `Datasets.RequestOptions`
-
-</dd>
-</dl>
-</dd>
-</dl>
-
-</dd>
-</dl>
-</details>
-
 ## Infill
 
 <details><summary><code>client.infill.<a href="/src/api/resources/infill/client/Client.ts">bytes</a>(leftAudio, rightAudio, { ...params }) -> stream.Readable</code></summary>
@@ -262,16 +198,24 @@ await client.datasets.uploadFile(fs.createReadStream("/path/to/your/file"), "str
 
 Generate audio that smoothly connects two existing audio segments. This is useful for inserting new speech between existing speech segments while maintaining natural transitions.
 
-The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.
+**The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.**
 
 Only the `sonic-preview` model is supported for infill at this time.
 
 At least one of `left_audio` or `right_audio` must be provided.
 
-</dd>
-</dl>
-</dd>
-</dl>
+As with all generative models, there's some inherent variability, but here's some tips we recommend to get the best results from infill:
+
+- Use longer infill transcripts
+    - This gives the model more flexibility to adapt to the rest of the audio
+- Target natural pauses in the audio when deciding where to clip
+    - This means you don't need word-level timestamps to be as precise
+- Clip right up to the start and end of the audio segment you want infilled, keeping as much silence in the left/right audio segments as possible
+    - This helps the model generate more natural transitions
+      </dd>
+      </dl>
+      </dd>
+      </dl>
 
 #### 🔌 Usage
 
@@ -656,10 +600,29 @@ await client.voices.list();
 </dl>
 </details>
 
-<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">create</a>({ ...params }) -> Cartesia.Voice</code></summary>
+<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">clone</a>(clip, { ...params }) -> Cartesia.VoiceMetadata</code></summary>
 <dl>
 <dd>
 
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
+
+Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
+
+Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
+
+</dd>
+</dl>
+</dd>
+</dl>
+
 #### 🔌 Usage
 
 <dl>
@@ -669,18 +632,12 @@ await client.voices.list();
 <dd>
 
 ```typescript
-await client.voices.create({
-    name: "string",
-    description: "string",
-    embedding: [
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1,
-    ],
+await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
+    name: "A high-stability cloned voice",
+    description: "Copied from Cartesia docs",
+    mode: "stability",
     language: "en",
+    enhance: true,
 });
 ```
 
@@ -697,7 +654,15 @@ await client.voices.create({
 <dl>
 <dd>
 
-**request:** `Cartesia.CreateVoiceRequest`
+**clip:** `File | fs.ReadStream | Blob`
+
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**request:** `Cartesia.CloneVoiceRequest`
 
 </dd>
 </dl>
@@ -729,7 +694,7 @@ await client.voices.create({
 <dd>
 
 ```typescript
-await client.voices.delete("string");
+await client.voices.delete("id");
 ```
 
 </dd>
@@ -777,9 +742,9 @@ await client.voices.delete("string");
 <dd>
 
 ```typescript
-await client.voices.update("string", {
-    name: "string",
-    description: "string",
+await client.voices.update("id", {
+    name: "name",
+    description: "description",
 });
 ```
 
@@ -836,7 +801,7 @@ await client.voices.update("string", {
 <dd>
 
 ```typescript
-await client.voices.get("string");
+await client.voices.get("id");
 ```
 
 </dd>
@@ -885,17 +850,10 @@ await client.voices.get("string");
 
 ```typescript
 await client.voices.localize({
-    embedding: [
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1,
-    ],
+    embedding: [1.1, 1.1],
     language: "en",
     originalSpeakerGender: "male",
-    dialect: "au",
+    dialect: undefined,
 });
 ```
 
@@ -947,7 +905,11 @@ await client.voices.localize({
 await client.voices.mix({
     voices: [
         {
-            id: "string",
+            id: "id",
+            weight: 1.1,
+        },
+        {
+            id: "id",
             weight: 1.1,
         },
     ],
@@ -986,7 +948,7 @@ await client.voices.mix({
 </dl>
 </details>
 
-<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">clone</a>(clip, { ...params }) -> Cartesia.VoiceMetadata</code></summary>
+<details><summary><code>client.voices.<a href="/src/api/resources/voices/client/Client.ts">create</a>({ ...params }) -> Cartesia.Voice</code></summary>
 <dl>
 <dd>
 
@@ -998,11 +960,7 @@ await client.voices.mix({
 <dl>
 <dd>
 
-Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
-
-Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
-
-Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
+Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
 
 </dd>
 </dl>
@@ -1018,12 +976,12 @@ Stability mode clones are more stable, but may not sound as similar to the sourc
 <dd>
 
 ```typescript
-await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
-    name: "A high-stability cloned voice",
-    description: "Copied from Cartesia docs",
-    mode: "stability",
+await client.voices.create({
+    name: "My Custom Voice",
+    description: "A custom voice created through the API",
+    embedding: [],
     language: "en",
-    enhance: true,
+    baseVoiceId: "123e4567-e89b-12d3-a456-426614174000",
 });
 ```
 
@@ -1040,15 +998,7 @@ await client.voices.clone(fs.createReadStream("/path/to/your/file"), {
 <dl>
 <dd>
 
-**clip:** `File | fs.ReadStream | Blob`
-
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**request:** `Cartesia.CloneVoiceRequest`
+**request:** `Cartesia.CreateVoiceRequest`
 
 </dd>
 </dl>

diff --git a/src/api/resources/apiStatus/client/Client.ts b/src/api/resources/apiStatus/client/Client.ts
@@ -53,8 +53,8 @@ export class ApiStatus {
                 "Cartesia-Version": requestOptions?.cartesiaVersion ?? this._options?.cartesiaVersion ?? "2024-06-10",
                 "X-Fern-Language": "JavaScript",
                 "X-Fern-SDK-Name": "@cartesia/cartesia-js",
-                "X-Fern-SDK-Version": "2.1.5",
-                "User-Agent": "@cartesia/cartesia-js/2.1.5",
+                "X-Fern-SDK-Version": "2.1.6",
+                "User-Agent": "@cartesia/cartesia-js/2.1.6",
                 "X-Fern-Runtime": core.RUNTIME.type,
                 "X-Fern-Runtime-Version": core.RUNTIME.version,
                 ...(await this._getCustomAuthorizationHeaders()),