Updated to include chunking for longer audio files

Mossy1022 · Jun 25, 2024 · fcb2096 · fcb2096
1 parent 4c277ca
commit fcb2096
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 26 deletions.
diff --git a/main.ts b/main.ts
@@ -124,33 +124,72 @@ export default class SmartMemosPlugin extends Plugin {
 
 	async generateTranscript(audioBuffer: ArrayBuffer, filetype: string) {
         if (this.settings.apiKey.length <= 1) throw new Error('OpenAI API Key is not provided.');
-
+    
         // Reference: www.stackoverflow.com/questions/74276173/how-to-send-multipart-form-data-payload-with-typescript-obsidian-library
         const N = 16 // The length of our random boundry string
         const randomBoundryString = 'WebKitFormBoundary' + Array(N + 1).join((Math.random().toString(36) + '00000000000000000').slice(2, 18)).slice(0, N)
         const pre_string = `------${randomBoundryString}\r\nContent-Disposition: form-data; name="file"; filename="audio.mp3"\r\nContent-Type: "application/octet-stream"\r\n\r\n`;
         const post_string = `\r\n------${randomBoundryString}\r\nContent-Disposition: form-data; name="model"\r\n\r\nwhisper-1\r\n------${randomBoundryString}--\r\n`
         const pre_string_encoded = new TextEncoder().encode(pre_string);
         const post_string_encoded = new TextEncoder().encode(post_string);
-        const concatenated = await new Blob([pre_string_encoded, audioBuffer, post_string_encoded]).arrayBuffer()
-
-        const options: RequestUrlParam = {
-            url: 'https://api.openai.com/v1/audio/transcriptions',
-            method: 'POST',
-            contentType: `multipart/form-data; boundary=----${randomBoundryString}`,
-            headers: {
-                'Authorization': 'Bearer ' + this.settings.apiKey
-            },
-            body: concatenated
-        };
+
+        // Calculate the size of each chunk
+        const chunkSize = 20 * 1024 * 1024; // 15 MB
+
+        // Calculate the number of chunks
+        const numChunks = Math.ceil(audioBuffer.byteLength / chunkSize);
+
+        if (numChunks < 2) {
+            new Notice(`Transcribing audio...`);
+        } else {
+            new Notice(`Transcribing audio in ${numChunks} chunks. This may take a minute or two...`);
+        }
 
-
-        const response = await requestUrl(options).catch((error) => { 
-            if (error.message.includes('401')) throw new Error('OpenAI API Key is not valid.');
-            else throw error; 
-        });
-        if ('text' in response.json) return response.json.text;
-        else throw new Error('Error. ' + JSON.stringify(response.json));
+
+        // Create an array to store the results
+        let results = [];
+
+        // Process each chunk
+        for (let i = 0; i < numChunks; i++) {
+
+            new Notice(`Transcribing chunk #${i + 1}...`);
+
+            // Get the start and end indices for this chunk
+            const start = i * chunkSize;
+            const end = Math.min(start + chunkSize, audioBuffer.byteLength);
+
+            // Extract the chunk from the audio buffer
+            const chunk = audioBuffer.slice(start, end);
+
+            // Concatenate the chunk with the pre and post strings
+            const concatenated = await new Blob([pre_string_encoded, chunk, post_string_encoded]).arrayBuffer()
+
+            const options: RequestUrlParam = {
+                url: 'https://api.openai.com/v1/audio/transcriptions',
+                method: 'POST',
+                contentType: `multipart/form-data; boundary=----${randomBoundryString}`,
+                headers: {
+                    'Authorization': 'Bearer ' + this.settings.apiKey
+                },
+                body: concatenated
+            };
+
+            const response = await requestUrl(options).catch((error) => { 
+                if (error.message.includes('401')) throw new Error('OpenAI API Key is not valid.');
+                else throw error; 
+            });
+
+            if ('text' in response.json) {
+                // Add the result to the results array
+                results.push(response.json.text);
+            }
+            else throw new Error('Error. ' + JSON.stringify(response.json));
+
+            // Wait for 1 second before processing the next chunk
+            await new Promise(resolve => setTimeout(resolve, 1000));
+        }
+        // Return all the results
+        return results.join(' ');
     }
 
     async findFilePath(text: string, regex: RegExp[]) {
@@ -179,11 +218,6 @@ export default class SmartMemosPlugin extends Plugin {
         if (prompt.length < 1) throw new Error('Cannot find prompt.');
         if ( this.settings.apiKey.length <= 1) throw new Error('OpenAI API Key is not provided.');
 
-		if (prompt.length > TOKEN_LIMITS[this.settings.model]) {
-			new Notice(`shortening prompt`);
-			prompt = prompt.substring(prompt.length - (TOKEN_LIMITS[this.settings.model] + 300));
-		}
-
 		prompt = prompt + '.';
 
         let newPrompt = prompt;

diff --git a/manifest.json b/manifest.json
@@ -1,7 +1,7 @@
 {
 	"id": "smart-memos",
 	"name": "Smart Memos",
-	"version": "1.0.9",
+	"version": "1.0.10",
 	"minAppVersion": "0.15.0",
 	"description": "Create personalized and intelligent analysis, summaries, and more for audio recordings that can be imported or spoken directly into a note",
 	"author": "Evan Moscoso",

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "smart-memos",
-	"version": "1.0.9",
+	"version": "1.0.10",
 	"description": "Create personalized and intelligent analysis, summaries, and more for audio recordings that can be imported or spoken directly into a note",
 	"main": "main.js",
 	"scripts": {