From fc66c404a6a5fd5fe6ceef4cc0b6a1794467ec73 Mon Sep 17 00:00:00 2001 From: jonmatthis Date: Wed, 24 Jan 2024 20:11:12 -0500 Subject: [PATCH] fix audio transcript splitting across messages --- .../threads/discord-attachment.service.ts | 205 +++++++++++------- .../threads/discord-message.service.ts | 37 +++- 2 files changed, 156 insertions(+), 86 deletions(-) diff --git a/src/interfaces/discord/services/threads/discord-attachment.service.ts b/src/interfaces/discord/services/threads/discord-attachment.service.ts index 5a7ddbb..0e3bb38 100644 --- a/src/interfaces/discord/services/threads/discord-attachment.service.ts +++ b/src/interfaces/discord/services/threads/discord-attachment.service.ts @@ -7,7 +7,6 @@ import { createReadStream, createWriteStream } from 'fs'; import { promisify } from 'util'; import * as stream from 'stream'; import * as fs from 'fs'; - @Injectable() export class DiscordAttachmentService { constructor( @@ -16,92 +15,148 @@ export class DiscordAttachmentService { ) {} async handleAttachment(attachment: Attachment) { - const fileType = attachment.name?.split('.').pop(); + const fileType = this.extractFileType(attachment.name); let attachmentResponse; - switch (fileType) { - case 'mp3': - case 'wav': - case 'ogg': - // Handle audio files - attachmentResponse = { - type: 'transcript', - ...(await this.handleAudioAttachment(attachment)), - }; - attachmentResponse.rawText = attachmentResponse.text; - attachmentResponse.text = `BEGIN AUDIO TRANSCRIPT: ${attachment.name}\n\n${attachmentResponse.text}\n\nEND AUDIO TRANSCRIPT: ${attachment.name}\n\n`; - break; - case 'mp4': - case 'avi': - case 'mkv': - // Handle video files - attachmentResponse = { - type: 'transcript', - ...(await this.handleAudioAttachment(attachment)), - }; - attachmentResponse.rawText = attachmentResponse.text; - attachmentResponse.text = `BEGIN VIDEO TRANSCRIPT: ${attachment.name}\n\n${attachmentResponse.text}\n\nEND VIDEO TRANSCRIPT: ${attachment.name}\n\n`; - break; - case 'txt': - case 'md': - case 'pdf': - attachmentResponse = { - type: 'file_text', - rawText: await this.handleTextAttachment(attachment), - }; - attachmentResponse.text = `BEGIN TEXT ATTACHMENT: ${attachment.name}\n\n${attachmentResponse.rawText}\n\nEND TEXT ATTACHMENT: ${attachment.name}\n\n`; - break; - case 'zip': - attachmentResponse = { - type: 'zip', - rawText: await this.handleZipAttachment(attachment), - }; - attachmentResponse.text = `BEGIN ZIP ATTACHMENT: ${attachment.name}\n\n${attachmentResponse.rawText}\n\nEND ZIP ATTACHMENT: ${attachment.name}\n\n`; - break; - default: - this._logger.log('Unsupported file type:', fileType); + + if (this.isAudio(fileType)) { + attachmentResponse = await this.processAudioAttachment( + attachment, + fileType, + ); + } else if (this.isVideo(fileType)) { + attachmentResponse = await this.processVideoAttachment( + attachment, + fileType, + ); + } else if (this.isText(fileType)) { + attachmentResponse = await this.processTextAttachment( + attachment, + fileType, + ); + } else if (fileType === 'zip') { + attachmentResponse = await this.processZipAttachment( + attachment, + fileType, + ); + } else { + this._logger.log('Unsupported file type:', fileType); + } + + return attachmentResponse + ? this.formatResponse(attachmentResponse, fileType, attachment) + : null; + } + + processAudioAttachment(attachment: Attachment, fileType: string) { + if (!this.isAudio(fileType)) { + throw new Error(`Unsupported file type: ${fileType}`); + } + return this.handleAudioAttachment(attachment).then((response) => ({ + type: 'transcript', + rawText: response.text, + Decorator: `AUDIO TRANSCRIPT: ${attachment.name}`, + })); + } + + processVideoAttachment(attachment: Attachment, fileType: string) { + if (!this.isVideo(fileType)) { + throw new Error(`Unsupported file type: ${fileType}`); + } + return this.handleAudioAttachment(attachment).then((response) => ({ + // assuming audio extraction from video + type: 'transcript', + rawText: response.text, + Decorator: `VIDEO TRANSCRIPT: ${attachment.name}`, + })); + } + + processTextAttachment(attachment: Attachment, fileType: string) { + if (!this.isText(fileType)) { + throw new Error(`Unsupported file type: ${fileType}`); + } + return this.handleTextAttachment(attachment).then((rawText) => ({ + type: 'file_text', + rawText, + Decorator: `TEXT ATTACHMENT: ${attachment.name}`, + })); + } + + processZipAttachment(attachment: Attachment, fileType: string) { + if (fileType !== 'zip') { + throw new Error(`Unsupported file type: ${fileType}`); } - return attachmentResponse; + return this.handleZipAttachment(attachment).then((rawText) => ({ + type: 'zip', + rawText, + Decorator: `ZIP ATTACHMENT: ${attachment.name}`, + })); + } + + formatResponse(response: any, fileType: string, attachment: Attachment) { + const simpleUrl = attachment.url.split('?')[0]; + return { + ...response, + text: `> File URL: ${simpleUrl}\n\n\`\`\`\n\nBEGIN ${response.Decorator}\n\n${response.rawText}\n\nEND ${response.Decorator}\n\n\`\`\``, + }; + } + + isAudio(fileType: string) { + return ['mp3', 'wav', 'ogg'].includes(fileType); + } + + isVideo(fileType: string) { + return ['mp4', 'avi', 'mkv'].includes(fileType); + } + + isText(fileType: string) { + return ['txt', 'md', 'pdf'].includes(fileType); + } + + extractFileType(filename: string | undefined): string { + return filename?.split('.').pop() || ''; } private async _downloadAttachment(attachment: Attachment): Promise { this._logger.log('Processing audio attachment:', attachment.name); + try { + // Define temp directory and file paths + const tempDirectoryPath = path.join(__dirname, 'temp'); + const tempFilePath = path.join( + tempDirectoryPath, + `tempfile-${path.basename(attachment.name)}`, + ); - // Define temp directory and file paths - const tempDirectoryPath = path.join(__dirname, 'temp'); - const tempFilePath = path.join( - tempDirectoryPath, - `tempfile-${path.basename(attachment.name)}`, - ); - - // Ensure temp directory exists - await fs.promises.mkdir(tempDirectoryPath, { recursive: true }); + // Ensure temp directory exists + await fs.promises.mkdir(tempDirectoryPath, { recursive: true }); - // Download the attachment and save to file - const response = await axios({ - method: 'get', - url: attachment.url, - responseType: 'stream', - }); + // Download the attachment and save to file + const response = await axios({ + method: 'get', + url: attachment.url, + responseType: 'stream', + }); - const writer = createWriteStream(tempFilePath); - response.data.pipe(writer); - await promisify(stream.finished)(writer); + const writer = createWriteStream(tempFilePath); + response.data.pipe(writer); + await promisify(stream.finished)(writer); - return tempFilePath; + return tempFilePath; + } catch (error) { + this._logger.error(`Error downloading attachment: ${error}`); + } } private async handleAudioAttachment(attachment: Attachment) { this._logger.log('Processing audio attachment:', attachment.name); - let tempFilePath: string; + const audioFilePath = await this._downloadAttachment(attachment); try { // Download attachment - tempFilePath = await this._downloadAttachment(attachment); // Process the downloaded file for transcription const transcriptionResponse = await this._openaiAudioService.createAudioTranscription({ - file: createReadStream(tempFilePath), + file: createReadStream(audioFilePath), model: 'whisper-1', language: 'en', response_format: 'verbose_json', @@ -109,27 +164,17 @@ export class DiscordAttachmentService { }); this._logger.log( - `Transcription: ${JSON.stringify(transcriptionResponse)}`, + `Transcription: ${JSON.stringify(transcriptionResponse, null, 2)}`, ); - return transcriptionResponse; + return { ...transcriptionResponse, audioFilePath }; } catch (error) { this._logger.error( `Error processing audio attachment: ${error.message || error}`, ); throw error; } finally { - // Clean up the temp file - if (tempFilePath) { - try { - await fs.promises.unlink(tempFilePath); - } catch (cleanupError) { - this._logger.error( - `Failed to clean up temporary file: ${ - cleanupError.message || cleanupError - }`, - ); - } - } + //delete the audio file + await fs.promises.unlink(audioFilePath); } } diff --git a/src/interfaces/discord/services/threads/discord-message.service.ts b/src/interfaces/discord/services/threads/discord-message.service.ts index 91ec79f..bd8b8f2 100644 --- a/src/interfaces/discord/services/threads/discord-message.service.ts +++ b/src/interfaces/discord/services/threads/discord-message.service.ts @@ -138,14 +138,39 @@ export class DiscordMessageService { await this._discordAttachmentService.handleAttachment(attachment); attachmentText += attachmentResponse.text; if (attachmentResponse.type === 'transcript') { - await discordMessage.reply( - `\`\`\`\n\n${attachmentResponse.text}\n\n\`\`\``, - ); + const maxMessageLength = 1800; // Reduced to 1800 to account for "message X of N" text + const fullAttachmenText = attachmentResponse.text; + const attachmentTextLength = fullAttachmenText.length; + + if (attachmentTextLength > maxMessageLength) { + const numberOfMessages = Math.ceil( + attachmentTextLength / maxMessageLength, + ); + let replyMessage: Message; + for (let i = 0; i < numberOfMessages; i++) { + const start = i * maxMessageLength; + const end = start + maxMessageLength; + const chunk = fullAttachmenText.slice(start, end); + const chunkMsg = `> Message ${ + i + 1 + } of ${numberOfMessages}\n\n${chunk}`; + replyMessage = await discordMessage.reply(chunkMsg); + } + if (replyMessage) { + await this._sendFullResponseAsAttachment( + attachmentResponse.text, + discordMessage, + replyMessage, + ); + } + } else { + await discordMessage.reply(fullAttachmenText); + } } + attachmentText += 'END TEXT FROM ATTACHMENTS'; } - attachmentText += 'END TEXT FROM ATTACHMENTS'; + return { humanInputText, attachmentText }; } - return { humanInputText, attachmentText }; } private async _sendFullResponseAsAttachment( @@ -155,7 +180,7 @@ export class DiscordMessageService { ) { // add full chunk to the message as a `.md` attachement const attachment = new AttachmentBuilder(Buffer.from(fullAiResponse), { - name: `reply_to_discordMessageId_${discordMessage.id}.md`, + name: `full_response_to_discordMessageId_${discordMessage.id}.md`, description: 'The full Ai response to message ID:${discordMessage.id}, ' + 'which was split across multiple messages so is being sent as an' +