Skip to content

Commit

Permalink
Updated to include chunking for longer audio files
Browse files Browse the repository at this point in the history
  • Loading branch information
Mossy1022 committed Jun 25, 2024
1 parent 4c277ca commit fcb2096
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 26 deletions.
82 changes: 58 additions & 24 deletions main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,33 +124,72 @@ export default class SmartMemosPlugin extends Plugin {

async generateTranscript(audioBuffer: ArrayBuffer, filetype: string) {
if (this.settings.apiKey.length <= 1) throw new Error('OpenAI API Key is not provided.');

// Reference: www.stackoverflow.com/questions/74276173/how-to-send-multipart-form-data-payload-with-typescript-obsidian-library
const N = 16 // The length of our random boundry string
const randomBoundryString = 'WebKitFormBoundary' + Array(N + 1).join((Math.random().toString(36) + '00000000000000000').slice(2, 18)).slice(0, N)
const pre_string = `------${randomBoundryString}\r\nContent-Disposition: form-data; name="file"; filename="audio.mp3"\r\nContent-Type: "application/octet-stream"\r\n\r\n`;
const post_string = `\r\n------${randomBoundryString}\r\nContent-Disposition: form-data; name="model"\r\n\r\nwhisper-1\r\n------${randomBoundryString}--\r\n`
const pre_string_encoded = new TextEncoder().encode(pre_string);
const post_string_encoded = new TextEncoder().encode(post_string);
const concatenated = await new Blob([pre_string_encoded, audioBuffer, post_string_encoded]).arrayBuffer()

const options: RequestUrlParam = {
url: 'https://api.openai.com/v1/audio/transcriptions',
method: 'POST',
contentType: `multipart/form-data; boundary=----${randomBoundryString}`,
headers: {
'Authorization': 'Bearer ' + this.settings.apiKey
},
body: concatenated
};

// Calculate the size of each chunk
const chunkSize = 20 * 1024 * 1024; // 15 MB

// Calculate the number of chunks
const numChunks = Math.ceil(audioBuffer.byteLength / chunkSize);

if (numChunks < 2) {
new Notice(`Transcribing audio...`);
} else {
new Notice(`Transcribing audio in ${numChunks} chunks. This may take a minute or two...`);
}


const response = await requestUrl(options).catch((error) => {
if (error.message.includes('401')) throw new Error('OpenAI API Key is not valid.');
else throw error;
});
if ('text' in response.json) return response.json.text;
else throw new Error('Error. ' + JSON.stringify(response.json));

// Create an array to store the results
let results = [];

// Process each chunk
for (let i = 0; i < numChunks; i++) {

new Notice(`Transcribing chunk #${i + 1}...`);

// Get the start and end indices for this chunk
const start = i * chunkSize;
const end = Math.min(start + chunkSize, audioBuffer.byteLength);

// Extract the chunk from the audio buffer
const chunk = audioBuffer.slice(start, end);

// Concatenate the chunk with the pre and post strings
const concatenated = await new Blob([pre_string_encoded, chunk, post_string_encoded]).arrayBuffer()

const options: RequestUrlParam = {
url: 'https://api.openai.com/v1/audio/transcriptions',
method: 'POST',
contentType: `multipart/form-data; boundary=----${randomBoundryString}`,
headers: {
'Authorization': 'Bearer ' + this.settings.apiKey
},
body: concatenated
};

const response = await requestUrl(options).catch((error) => {
if (error.message.includes('401')) throw new Error('OpenAI API Key is not valid.');
else throw error;
});

if ('text' in response.json) {
// Add the result to the results array
results.push(response.json.text);
}
else throw new Error('Error. ' + JSON.stringify(response.json));

// Wait for 1 second before processing the next chunk
await new Promise(resolve => setTimeout(resolve, 1000));
}
// Return all the results
return results.join(' ');
}

async findFilePath(text: string, regex: RegExp[]) {
Expand Down Expand Up @@ -179,11 +218,6 @@ export default class SmartMemosPlugin extends Plugin {
if (prompt.length < 1) throw new Error('Cannot find prompt.');
if ( this.settings.apiKey.length <= 1) throw new Error('OpenAI API Key is not provided.');

if (prompt.length > TOKEN_LIMITS[this.settings.model]) {
new Notice(`shortening prompt`);
prompt = prompt.substring(prompt.length - (TOKEN_LIMITS[this.settings.model] + 300));
}

prompt = prompt + '.';

let newPrompt = prompt;
Expand Down
2 changes: 1 addition & 1 deletion manifest.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "smart-memos",
"name": "Smart Memos",
"version": "1.0.9",
"version": "1.0.10",
"minAppVersion": "0.15.0",
"description": "Create personalized and intelligent analysis, summaries, and more for audio recordings that can be imported or spoken directly into a note",
"author": "Evan Moscoso",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "smart-memos",
"version": "1.0.9",
"version": "1.0.10",
"description": "Create personalized and intelligent analysis, summaries, and more for audio recordings that can be imported or spoken directly into a note",
"main": "main.js",
"scripts": {
Expand Down

0 comments on commit fcb2096

Please sign in to comment.