-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4356 from remotion-dev/openai-whisper
- Loading branch information
Showing
29 changed files
with
942 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import React from 'react'; | ||
import {Grid} from '../../components/TableOfContents/Grid'; | ||
import {TOCItem} from '../../components/TableOfContents/TOCItem'; | ||
|
||
export const TableOfContents: React.FC = () => { | ||
return ( | ||
<div> | ||
<Grid> | ||
<TOCItem link="/docs/openai-whisper/openai-whisper-api-to-captions"> | ||
<strong>{'openAiWhisperApiToCaptions()'}</strong> | ||
<div> | ||
Turn OpenAI Whisper API transcriptions into an array of{' '} | ||
<code>Caption</code> | ||
</div> | ||
</TOCItem> | ||
</Grid> | ||
</div> | ||
); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
--- | ||
image: /generated/articles-docs-openai-whisper-index.png | ||
title: '@remotion/openai-whisper' | ||
crumb: 'Subtitle videos' | ||
--- | ||
|
||
_Available from v4.0.217_ | ||
|
||
The `@remotion/openai-whisper` package provides utilities for working with the OpenAI Whisper API. | ||
|
||
It provides a transformation function for converting the output of the OpenAI Whisper API into an array of [`Caption`](/docs/captions/caption) objects in order to make the captions compatible with other caption-related Remotion APIs. | ||
|
||
import {TableOfContents} from './TableOfContents'; | ||
|
||
<Installation pkg="@remotion/openai-whisper" /> | ||
|
||
## APIs | ||
|
||
<TableOfContents /> | ||
|
||
## License | ||
|
||
MIT | ||
|
||
## See also | ||
|
||
- [`@remotion/captions`](/docs/captions) | ||
- [`@remotion/install-whisper-cpp`](/docs/install-whisper-cpp) |
41 changes: 41 additions & 0 deletions
41
packages/docs/docs/openai-whisper/openai-whisper-api-to-captions.mdx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
--- | ||
image: /generated/articles-docs-openai-whisper-openai-whisper-api-to-captions.png | ||
title: openAiWhisperApiToCaptions() | ||
crumb: '@remotion/openai-whisper' | ||
--- | ||
|
||
# openAiWhisperApiToCaptions()<AvailableFrom v="4.0.217"/> | ||
|
||
Turns the output from [`openai.audio.transcriptions.create`](https://platform.openai.com/docs/guides/speech-to-text/transcriptions) from the [openai](https://npm.im/openai) package into an array of [`Caption`](/docs/captions/caption) objects. | ||
|
||
This package performs processing on the captions in order to retain the punctuation in the words, which is not by default included in the OpenAI response. | ||
|
||
This function can be used in any JavaScript environment, but you should not use the OpenAI API in the browser because your API key will be exposed to the browser. | ||
|
||
```tsx twoslash title="Example usage" | ||
// @module: ESNext | ||
// @target: ESNext | ||
|
||
import fs from 'fs'; | ||
import {OpenAI} from 'openai'; | ||
import {openAiWhisperApiToCaptions} from '@remotion/openai-whisper'; | ||
|
||
const openai = new OpenAI(); | ||
|
||
const transcription = await openai.audio.transcriptions.create({ | ||
file: fs.createReadStream('audio.mp3'), | ||
model: 'whisper-1', | ||
response_format: 'verbose_json', | ||
prompt: 'Hello, welcome to my lecture.', | ||
timestamp_granularities: ['word'], | ||
}); | ||
|
||
const {captions} = openAiWhisperApiToCaptions({transcription}); | ||
``` | ||
|
||
## See also | ||
|
||
- [Source code for this function](https://github.com/remotion-dev/remotion/blob/main/packages/openai-whisper/src/openai-whisper-api-to-captions.ts) | ||
- [`@remotion/openai-whisper`](/docs/openai-whisper) | ||
- [`@remotion/install-whisper-cpp`](/docs/install-whisper-cpp) | ||
- [`@remotion/captions`](/docs/captions) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+45.2 KB
packages/docs/static/generated/articles-docs-openai-whisper-index.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+48.7 KB
...tatic/generated/articles-docs-openai-whisper-openai-whisper-api-to-captions.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
{"version":1,"defects":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":5},"times":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":0.037,"Remotion\\LambdaPhp\\Tests\\PHPRenderProgressTest::testClient":0.017}} | ||
{"version":1,"defects":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":5},"times":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":0.037,"Remotion\\LambdaPhp\\Tests\\PHPRenderProgressTest::testClient":0.02}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"extends": "@jonny", | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# @remotion/openai-whisper | ||
|
||
Work with the output of the OpenAI Whisper API | ||
|
||
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/openai-whisper.svg?style=flat&color=black&label=Downloads)](https://npmcharts.com/compare/@remotion/openai-whisper?minimal=true) | ||
|
||
## Installation | ||
|
||
```bash | ||
npm install @remotion/openai-whisper --save-exact | ||
``` | ||
|
||
When installing a Remotion package, make sure to align the version of all `remotion` and `@remotion/*` packages to the same version. | ||
Remove the `^` character from the version number to use the exact version. | ||
|
||
## Usage | ||
|
||
See the [documentation](https://www.remotion.dev/docs/openai-whisper) for more information. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
"repository": { | ||
"url": "https://github.com/remotion-dev/remotion/tree/main/packages/openai-whisper" | ||
}, | ||
"name": "@remotion/openai-whisper", | ||
"version": "4.0.216", | ||
"description": "Work with the output of the OpenAI Whisper API", | ||
"main": "dist/index.js", | ||
"sideEffects": false, | ||
"bugs": { | ||
"url": "https://github.com/remotion-dev/remotion/issues" | ||
}, | ||
"scripts": { | ||
"formatting": "prettier src --check", | ||
"lint": "eslint src --ext ts,tsx", | ||
"test": "bun test src" | ||
}, | ||
"files": [ | ||
"dist" | ||
], | ||
"author": "Jonny Burger <[email protected]>", | ||
"license": "MIT", | ||
"dependencies": { | ||
"@remotion/captions": "workspace:*" | ||
}, | ||
"peerDependencies": {}, | ||
"devDependencies": { | ||
"openai": "4.67.1" | ||
}, | ||
"keywords": [ | ||
"remotion" | ||
], | ||
"publishConfig": { | ||
"access": "public" | ||
}, | ||
"homepage": "https://www.remotion.dev/docs/openai-whisper" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
export { | ||
OpenAiToCaptionsInput, | ||
OpenAiToCaptionsOutput, | ||
openAiWhisperApiToCaptions, | ||
} from './openai-whisper-api-to-captions'; | ||
|
||
export {OpenAiVerboseTranscription} from './openai-format'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
export interface TranscriptionWord { | ||
end: number; | ||
start: number; | ||
word: string; | ||
} | ||
|
||
export interface TranscriptionSegment { | ||
id: number; | ||
avg_logprob: number; | ||
compression_ratio: number; | ||
end: number; | ||
no_speech_prob: number; | ||
seek: number; | ||
start: number; | ||
temperature: number; | ||
text: string; | ||
tokens: Array<number>; | ||
} | ||
|
||
export interface OpenAiVerboseTranscription { | ||
duration: number | string; | ||
task?: string; | ||
language: string; | ||
text: string; | ||
segments?: Array<TranscriptionSegment>; | ||
words?: Array<TranscriptionWord>; | ||
} |
47 changes: 47 additions & 0 deletions
47
packages/openai-whisper/src/openai-whisper-api-to-captions.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import type {Caption} from '@remotion/captions'; | ||
import type {OpenAiVerboseTranscription} from './openai-format'; | ||
|
||
export type OpenAiToCaptionsInput = { | ||
transcription: OpenAiVerboseTranscription; | ||
}; | ||
|
||
export type OpenAiToCaptionsOutput = { | ||
captions: Caption[]; | ||
}; | ||
|
||
export const openAiWhisperApiToCaptions = ({ | ||
transcription, | ||
}: OpenAiToCaptionsInput): OpenAiToCaptionsOutput => { | ||
const captions: Caption[] = []; | ||
if (!transcription.words) { | ||
throw new Error( | ||
'The transcription does need to be been generated with `timestamp_granularities: ["word"]`', | ||
); | ||
} | ||
|
||
let remainingText = transcription.text; | ||
|
||
for (const word of transcription.words) { | ||
const match = new RegExp(`^(^.{0,4})${word.word}([\\?,\\.]{0,3})?`).exec( | ||
remainingText, | ||
); | ||
if (!match) { | ||
throw new Error( | ||
`Unable to parse punctuation from OpenAI Whisper output. Could not find word "${word.word}" in text "${remainingText.slice(0, 100)}". File an issue under https://remotion.dev/issue to ask for a fix.`, | ||
); | ||
} | ||
|
||
const foundText = match[0]; | ||
remainingText = remainingText.slice(foundText.length); | ||
|
||
captions.push({ | ||
confidence: null, | ||
endMs: word.end * 1000, | ||
startMs: word.start * 1000, | ||
text: foundText, | ||
timestampMs: ((word.start + word.end) / 2) * 1000, | ||
}); | ||
} | ||
|
||
return {captions}; | ||
}; |
Oops, something went wrong.