Skip to content

Commit

Permalink
Merge pull request #4356 from remotion-dev/openai-whisper
Browse files Browse the repository at this point in the history
  • Loading branch information
JonnyBurger authored Oct 4, 2024
2 parents 687c235 + 244c55d commit dac638c
Show file tree
Hide file tree
Showing 29 changed files with 942 additions and 33 deletions.
1 change: 1 addition & 0 deletions packages/STATS.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Monthly downloads of Remotion packages
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/media-utils.svg?style=flat&color=black&label=@remotion/media-utils)](https://npmcharts.com/compare/@remotion/media-utils?minimal=true)
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/motion-blur.svg?style=flat&color=black&label=@remotion/motion-blur)](https://npmcharts.com/compare/@remotion/motion-blur?minimal=true)
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/noise.svg?style=flat&color=black&label=@remotion/noise)](https://npmcharts.com/compare/@remotion/noise?minimal=true)
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/openai-whisper.svg?style=flat&color=black&label=@remotion/openai-whisper)](https://npmcharts.com/compare/@remotion/openai-whisper?minimal=true)
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/paths.svg?style=flat&color=black&label=@remotion/paths)](https://npmcharts.com/compare/@remotion/paths?minimal=true)
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/player.svg?style=flat&color=black&label=@remotion/player)](https://npmcharts.com/compare/@remotion/player?minimal=true)
[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/preload.svg?style=flat&color=black&label=@remotion/preload)](https://npmcharts.com/compare/@remotion/preload?minimal=true)
Expand Down
2 changes: 1 addition & 1 deletion packages/ai-improvements/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
},
"name": "ai-improvements",
"dependencies": {
"openai": "^4.18.0"
"openai": "4.67.1"
},
"private": true,
"version": "4.0.216"
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/list-of-remotion-packages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export const listOfRemotionPackages = [
'@remotion/tailwind',
'@remotion/transitions',
'@remotion/install-whisper-cpp',
'@remotion/openai-whisper',
'@remotion/captions',
'@remotion/animation-utils',
'@remotion/animated-emoji',
Expand Down
1 change: 1 addition & 0 deletions packages/create-video/src/list-of-remotion-packages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export const listOfRemotionPackages = [
'@remotion/tailwind',
'@remotion/transitions',
'@remotion/install-whisper-cpp',
'@remotion/openai-whisper',
'@remotion/captions',
'@remotion/animation-utils',
'@remotion/animated-emoji',
Expand Down
4 changes: 4 additions & 0 deletions packages/docs/components/TableOfContents/api.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {TableOfContents as EnableScssTableOfContents} from '../../docs/enable-sc
import {TableOfContents as FontsTableOfContents} from '../../docs/fonts-api/TableOfContents';
import {TableOfContents as InstallWhisperCppTableOfContents} from '../../docs/install-whisper-cpp/install-whisper-cpp';
import {TableOfContents as MediaParserTableOfContents} from '../../docs/media-parser/TableOfContents';
import {TableOfContents as OpenAiWhisperTableOfContents} from '../../docs/openai-whisper/TableOfContents';
import {TableOfContents as SkiaTableOfContents} from '../../docs/skia/TableOfContents';
import {TableOfContents as StudioTableOfContents} from '../../docs/studio/TableOfContents';
import {TableOfContents as TailwindTableOfContents} from '../../docs/tailwind/TableOfContents';
Expand Down Expand Up @@ -121,6 +122,9 @@ export const TableOfContents: React.FC = () => {
<h2>@remotion/install-whisper-cpp</h2>
<p>Whisper.cpp installation and transcription</p>
<InstallWhisperCppTableOfContents />
<h2>@remotion/openai-whisper</h2>
<p>Work with transcriptions from OpenAI Whisper</p>
<OpenAiWhisperTableOfContents />
<h2>@remotion/animated-emoji</h2>
<p>Google Fonts Animated Emojis as Remotion Components</p>
<AnimatedEmojiTableOfContents />
Expand Down
12 changes: 6 additions & 6 deletions packages/docs/docs/captions/caption.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ crumb: '@remotion/captions'
This is a simple data structure for a caption.

```tsx twoslash
import {Caption} from '@remotion/captions';
// ^?
import type {Caption} from '@remotion/captions';
// ^?
```

By establishing a standard data structure, we allow many operations that involve captions to be interoperable:

- Transcribing (using the [`@remotion/install-whisper-cpp`](/docs/install-whisper-cpp) package)
- Formatting (for example, creating pages using [`createTikTokStyleCaptions()`](/docs/captions/create-tiktok-style-captions))
- Parsing (using the [`parseSrt()`](/docs/captions/parse-srt) function)
- Serializing (for example to a .srt file)
- **Transcribing**: Using the [`@remotion/install-whisper-cpp`](/docs/install-whisper-cpp) or [`@remotion/openai-whisper`](/docs/openai-whisper) packages
- **Formatting**: For example, creating pages using [`createTikTokStyleCaptions()`](/docs/captions/create-tiktok-style-captions)
- **Parsing**: Using the [`parseSrt()`](/docs/captions/parse-srt) function
- **Serializing**: For example to a `.srt` file using [`serializeSrt()`](/docs/captions/serialize-srt)

## Fields

Expand Down
19 changes: 19 additions & 0 deletions packages/docs/docs/openai-whisper/TableOfContents.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import React from 'react';
import {Grid} from '../../components/TableOfContents/Grid';
import {TOCItem} from '../../components/TableOfContents/TOCItem';

export const TableOfContents: React.FC = () => {
return (
<div>
<Grid>
<TOCItem link="/docs/openai-whisper/openai-whisper-api-to-captions">
<strong>{'openAiWhisperApiToCaptions()'}</strong>
<div>
Turn OpenAI Whisper API transcriptions into an array of{' '}
<code>Caption</code>
</div>
</TOCItem>
</Grid>
</div>
);
};
28 changes: 28 additions & 0 deletions packages/docs/docs/openai-whisper/index.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
image: /generated/articles-docs-openai-whisper-index.png
title: '@remotion/openai-whisper'
crumb: 'Subtitle videos'
---

_Available from v4.0.217_

The `@remotion/openai-whisper` package provides utilities for working with the OpenAI Whisper API.

It provides a transformation function for converting the output of the OpenAI Whisper API into an array of [`Caption`](/docs/captions/caption) objects in order to make the captions compatible with other caption-related Remotion APIs.

import {TableOfContents} from './TableOfContents';

<Installation pkg="@remotion/openai-whisper" />

## APIs

<TableOfContents />

## License

MIT

## See also

- [`@remotion/captions`](/docs/captions)
- [`@remotion/install-whisper-cpp`](/docs/install-whisper-cpp)
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
---
image: /generated/articles-docs-openai-whisper-openai-whisper-api-to-captions.png
title: openAiWhisperApiToCaptions()
crumb: '@remotion/openai-whisper'
---

# openAiWhisperApiToCaptions()<AvailableFrom v="4.0.217"/>

Turns the output from [`openai.audio.transcriptions.create`](https://platform.openai.com/docs/guides/speech-to-text/transcriptions) from the [openai](https://npm.im/openai) package into an array of [`Caption`](/docs/captions/caption) objects.

This package performs processing on the captions in order to retain the punctuation in the words, which is not by default included in the OpenAI response.

This function can be used in any JavaScript environment, but you should not use the OpenAI API in the browser because your API key will be exposed to the browser.

```tsx twoslash title="Example usage"
// @module: ESNext
// @target: ESNext

import fs from 'fs';
import {OpenAI} from 'openai';
import {openAiWhisperApiToCaptions} from '@remotion/openai-whisper';

const openai = new OpenAI();

const transcription = await openai.audio.transcriptions.create({
file: fs.createReadStream('audio.mp3'),
model: 'whisper-1',
response_format: 'verbose_json',
prompt: 'Hello, welcome to my lecture.',
timestamp_granularities: ['word'],
});

const {captions} = openAiWhisperApiToCaptions({transcription});
```

## See also

- [Source code for this function](https://github.com/remotion-dev/remotion/blob/main/packages/openai-whisper/src/openai-whisper-api-to-captions.ts)
- [`@remotion/openai-whisper`](/docs/openai-whisper)
- [`@remotion/install-whisper-cpp`](/docs/install-whisper-cpp)
- [`@remotion/captions`](/docs/captions)
4 changes: 3 additions & 1 deletion packages/docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"@remotion/noise": "workspace:*",
"@remotion/paths": "workspace:*",
"@remotion/player": "workspace:*",
"@remotion/openai-whisper": "workspace:*",
"@remotion/preload": "workspace:*",
"@remotion/renderer": "workspace:*",
"@remotion/rive": "workspace:*",
Expand Down Expand Up @@ -94,6 +95,7 @@
"@types/unist": "^2.0.0",
"@types/node": "20.12.14",
"lodash.sortby": "^4.7.0",
"url-loader": "^4.1.1"
"url-loader": "^4.1.1",
"openai": "4.67.1"
}
}
9 changes: 9 additions & 0 deletions packages/docs/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,15 @@ module.exports = {
'install-whisper-cpp/convert-to-captions',
],
},
{
type: 'category',
label: '@remotion/openai-whisper',
link: {
type: 'doc',
id: 'openai-whisper/index',
},
items: ['openai-whisper/openai-whisper-api-to-captions'],
},
{
type: 'category',
label: '@remotion/fonts',
Expand Down
28 changes: 21 additions & 7 deletions packages/docs/src/data/articles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,13 @@ export const articles = [
compId: 'articles-docs-renderer-render-frames',
crumb: '@remotion/renderer',
},
{
id: 'metadata',
title: 'Setting video metadata',
relativePath: 'docs/metadata.mdx',
compId: 'articles-docs-metadata',
crumb: 'Techniques',
},
{
id: 'bundle',
title: 'bundle()',
Expand Down Expand Up @@ -790,6 +797,20 @@ export const articles = [
compId: 'articles-docs-get-image-dimensions',
crumb: '@remotion/media-utils',
},
{
id: 'openai-whisper/index',
title: '@remotion/openai-whisper',
relativePath: 'docs/openai-whisper/index.mdx',
compId: 'articles-docs-openai-whisper-index',
crumb: 'Subtitle videos',
},
{
id: 'openai-whisper/openai-whisper-api-to-captions',
title: 'openAiWhisperApiToCaptions()',
relativePath: 'docs/openai-whisper/openai-whisper-api-to-captions.mdx',
compId: 'articles-docs-openai-whisper-openai-whisper-api-to-captions',
crumb: '@remotion/openai-whisper',
},
{
id: 'cancel-render',
title: 'cancelRender()',
Expand Down Expand Up @@ -1633,13 +1654,6 @@ export const articles = [
compId: 'articles-docs-miscellaneous-chrome-headless-shell',
crumb: 'FAQ',
},
{
id: 'miscellaneous/render-in-browser',
title: 'Can I render videos in the browser?',
relativePath: 'docs/miscellaneous/render-in-browser.mdx',
compId: 'articles-docs-miscellaneous-render-in-browser',
crumb: 'Techniques',
},
{
id: 'miscellaneous/render-in-browser',
title: 'Can I render videos in the browser?',
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion packages/lambda-php/.phpunit.result.cache
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version":1,"defects":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":5},"times":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":0.037,"Remotion\\LambdaPhp\\Tests\\PHPRenderProgressTest::testClient":0.017}}
{"version":1,"defects":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":5},"times":{"Remotion\\LambdaPhp\\Tests\\PHPClientTest::testClient":0.037,"Remotion\\LambdaPhp\\Tests\\PHPRenderProgressTest::testClient":0.02}}
3 changes: 3 additions & 0 deletions packages/openai-whisper/.eslintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"extends": "@jonny",
}
18 changes: 18 additions & 0 deletions packages/openai-whisper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# @remotion/openai-whisper

Work with the output of the OpenAI Whisper API

[![NPM Downloads](https://img.shields.io/npm/dm/@remotion/openai-whisper.svg?style=flat&color=black&label=Downloads)](https://npmcharts.com/compare/@remotion/openai-whisper?minimal=true)

## Installation

```bash
npm install @remotion/openai-whisper --save-exact
```

When installing a Remotion package, make sure to align the version of all `remotion` and `@remotion/*` packages to the same version.
Remove the `^` character from the version number to use the exact version.

## Usage

See the [documentation](https://www.remotion.dev/docs/openai-whisper) for more information.
37 changes: 37 additions & 0 deletions packages/openai-whisper/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"repository": {
"url": "https://github.com/remotion-dev/remotion/tree/main/packages/openai-whisper"
},
"name": "@remotion/openai-whisper",
"version": "4.0.216",
"description": "Work with the output of the OpenAI Whisper API",
"main": "dist/index.js",
"sideEffects": false,
"bugs": {
"url": "https://github.com/remotion-dev/remotion/issues"
},
"scripts": {
"formatting": "prettier src --check",
"lint": "eslint src --ext ts,tsx",
"test": "bun test src"
},
"files": [
"dist"
],
"author": "Jonny Burger <[email protected]>",
"license": "MIT",
"dependencies": {
"@remotion/captions": "workspace:*"
},
"peerDependencies": {},
"devDependencies": {
"openai": "4.67.1"
},
"keywords": [
"remotion"
],
"publishConfig": {
"access": "public"
},
"homepage": "https://www.remotion.dev/docs/openai-whisper"
}
7 changes: 7 additions & 0 deletions packages/openai-whisper/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export {
OpenAiToCaptionsInput,
OpenAiToCaptionsOutput,
openAiWhisperApiToCaptions,
} from './openai-whisper-api-to-captions';

export {OpenAiVerboseTranscription} from './openai-format';
27 changes: 27 additions & 0 deletions packages/openai-whisper/src/openai-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
export interface TranscriptionWord {
end: number;
start: number;
word: string;
}

export interface TranscriptionSegment {
id: number;
avg_logprob: number;
compression_ratio: number;
end: number;
no_speech_prob: number;
seek: number;
start: number;
temperature: number;
text: string;
tokens: Array<number>;
}

export interface OpenAiVerboseTranscription {
duration: number | string;
task?: string;
language: string;
text: string;
segments?: Array<TranscriptionSegment>;
words?: Array<TranscriptionWord>;
}
47 changes: 47 additions & 0 deletions packages/openai-whisper/src/openai-whisper-api-to-captions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import type {Caption} from '@remotion/captions';
import type {OpenAiVerboseTranscription} from './openai-format';

export type OpenAiToCaptionsInput = {
transcription: OpenAiVerboseTranscription;
};

export type OpenAiToCaptionsOutput = {
captions: Caption[];
};

export const openAiWhisperApiToCaptions = ({
transcription,
}: OpenAiToCaptionsInput): OpenAiToCaptionsOutput => {
const captions: Caption[] = [];
if (!transcription.words) {
throw new Error(
'The transcription does need to be been generated with `timestamp_granularities: ["word"]`',
);
}

let remainingText = transcription.text;

for (const word of transcription.words) {
const match = new RegExp(`^(^.{0,4})${word.word}([\\?,\\.]{0,3})?`).exec(
remainingText,
);
if (!match) {
throw new Error(
`Unable to parse punctuation from OpenAI Whisper output. Could not find word "${word.word}" in text "${remainingText.slice(0, 100)}". File an issue under https://remotion.dev/issue to ask for a fix.`,
);
}

const foundText = match[0];
remainingText = remainingText.slice(foundText.length);

captions.push({
confidence: null,
endMs: word.end * 1000,
startMs: word.start * 1000,
text: foundText,
timestampMs: ((word.start + word.end) / 2) * 1000,
});
}

return {captions};
};
Loading

0 comments on commit dac638c

Please sign in to comment.