Skip to content

Commit

Permalink
Add placeholder code for speech search.
Browse files Browse the repository at this point in the history
  • Loading branch information
rotemdan committed Nov 22, 2024
1 parent bef0c32 commit bb548b8
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/api/SpeechSearch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { AudioSourceParam } from "../audio/AudioUtilities.js";

export async function searchSpeech(inputAudio: AudioSourceParam, text: string, options: SpeechSearchOptions): Promise<SpeechSearchResult> {
return {}
}

export interface SpeechSearchOptions {

}

export interface SpeechSearchResult {
}
57 changes: 57 additions & 0 deletions src/speech-search/DTWSpeechSearch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { DtwGranularity, getMfccOptionsForGranularity } from "../alignment/SpeechAlignment.js";
import { RawAudio } from "../audio/AudioUtilities.js";
import { computeMFCCs, extendDefaultMfccOptions, MfccOptions } from "../dsp/MFCC.js";
import { euclideanDistance13Dim } from "../math/VectorMath.js";
import { Logger } from "../utilities/Logger.js";

export async function searchSpeech(sourceRawAudio: RawAudio, queryRawAudio: RawAudio) {
const logger = new Logger()

const granularity: DtwGranularity = 'low'

const mfccOptions = extendDefaultMfccOptions({ ...getMfccOptionsForGranularity(granularity), zeroFirstCoefficient: true }) as MfccOptions

logger.start('Compute query MFCC features')
const queryMfccs = await computeMFCCs(queryRawAudio, mfccOptions)

logger.start('Compute source MFCC features')
const sourceMfccs = await computeMFCCs(sourceRawAudio, mfccOptions)

logger.start('Compute cost matrix')

const costMatrixColumns = computeCostMatrix(sourceMfccs, queryMfccs, euclideanDistance13Dim)

logger.start('Search')

const rowCount = sourceMfccs.length
const columnCount = queryMfccs.length

const maxSearchWindow = columnCount * 2

for (let rowStartOffset = 0; rowStartOffset < rowCount; rowStartOffset++) {
const rowEndOffset = Math.min(rowStartOffset + maxSearchWindow, rowCount)


}
}

function computeCostMatrix<T, U>(sequence1: T[], sequence2: U[], costFunction: (a: T, b: U) => number) {
const rowCount = sequence1.length
const columnCount = sequence2.length

const costMatrixColumns: Float32Array[] = []

for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
const column = new Float32Array(rowCount)

for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
const cost = costFunction(sequence1[rowIndex], sequence2[columnIndex])

column[columnIndex] = cost
}

costMatrixColumns.push(column)
}

return costMatrixColumns
}

0 comments on commit bb548b8

Please sign in to comment.