Skip to content

Commit

Permalink
add jsdoc for processors.js
Browse files Browse the repository at this point in the history
  • Loading branch information
chelouche9 committed Mar 22, 2023
1 parent d0d7824 commit b2cb4e6
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 8 deletions.
34 changes: 29 additions & 5 deletions dist/types/processors.d.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
/**
* A helper class to determine model type from config.
*/
export class AutoProcessor {
static from_pretrained(modelPath: any, progressCallback?: any): Promise<WhisperProcessor>;
}
declare class WhisperProcessor extends Processor {
/**
* Returns a new instance of a Processor with a feature extractor
* based on the configuration file located at `modelPath`.
*
* @param {string} modelPath - The path to the model directory.
* @param {function} progressCallback - A callback function to track the loading progress (optional).
* @returns {Promise<Processor>} A Promise that resolves with a new instance of a Processor.
* @throws {Error} If the feature extractor type specified in the configuration file is unknown.
*/
static from_pretrained(modelPath: string, progressCallback?: Function): Promise<Processor>;
}
/**
* Represents a Processor that extracts features from an input.
* @extends Callable
*/
declare class Processor extends Callable {
constructor(feature_extractor: any);
feature_extractor: any;
/**
* Creates a new Processor with the given feature extractor.
* @param {function} feature_extractor - The function used to extract features from the input.
*/
constructor(feature_extractor: Function);
feature_extractor: Function;
/**
* Calls the feature_extractor function with the given input.
* @param {any} input - The input to extract features from.
* @returns {Promise<any>} A Promise that resolves with the extracted features.
* @async
*/
_call(input: any): Promise<any>;
}
import { Callable } from "./utils.js";
Expand Down
139 changes: 136 additions & 3 deletions src/processors.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,20 @@ const Jimp = (typeof self !== 'undefined') ? (self.Jimp || _Jimp) : _Jimp;

const B64_STRING = /^data:image\/\w+;base64,/;

/**
* A helper class to determine model type from config.
*/
class AutoProcessor {
// Helper class to determine model type from config

/**
* Returns a new instance of a Processor with a feature extractor
* based on the configuration file located at `modelPath`.
*
* @param {string} modelPath - The path to the model directory.
* @param {function} progressCallback - A callback function to track the loading progress (optional).
* @returns {Promise<Processor>} A Promise that resolves with a new instance of a Processor.
* @throws {Error} If the feature extractor type specified in the configuration file is unknown.
*/
static async from_pretrained(modelPath, progressCallback = null) {

let preprocessorConfig = await fetchJSON(modelPath, 'preprocessor_config.json', progressCallback)
Expand Down Expand Up @@ -57,14 +68,41 @@ class AutoProcessor {
}
}

/**
* Base class for feature extractors.
*
* @extends Callable
*/
class FeatureExtractor extends Callable {
/**
* Constructs a new FeatureExtractor instance.
*
* @param {object} config - The configuration for the feature extractor.
*/
constructor(config) {
super();
this.config = config
}
}

/**
* Feature extractor for Vision Transformer (ViT) models.
*
* @extends FeatureExtractor
*/
class ViTFeatureExtractor extends FeatureExtractor {

/**
* Constructs a new ViTFeatureExtractor instance.
*
* @param {object} config - The configuration for the feature extractor.
* @param {number[]} config.image_mean - The mean values for image normalization.
* @param {number[]} config.image_std - The standard deviation values for image normalization.
* @param {boolean} config.do_rescale - Whether to rescale the image pixel values to the [0,1] range.
* @param {boolean} config.do_normalize - Whether to normalize the image pixel values.
* @param {boolean} config.do_resize - Whether to resize the image.
* @param {number} config.size - The size to resize the image to.
*/
constructor(config) {
super(config);

Expand All @@ -85,9 +123,17 @@ class ViTFeatureExtractor extends FeatureExtractor {
this.size = this.config.size;
}


/**
* Preprocesses the given image.
*
* @param {string} url - The URL of the image to preprocess.
* @returns {Promise<any>} The preprocessed image as a Tensor.
*/
async preprocess(url) {

/**
* @type {string | Buffer | Uint8Array}
*/
let imgToLoad = url;
if (B64_STRING.test(url)) {
imgToLoad = imgToLoad.replace(B64_STRING, '');
Expand Down Expand Up @@ -143,12 +189,22 @@ class ViTFeatureExtractor extends FeatureExtractor {
return transposed;
}

/**
* Calls the feature extraction process on an array of image
* URLs, preprocesses each image, and concatenates the resulting
* features into a single Tensor.
* @param {string|string[]} urls - The URL(s) of the image(s) to extract features from.
* @returns {Promise<Object>} An object containing the concatenated pixel values of the preprocessed images.
*/
async _call(urls) {
if (!Array.isArray(urls)) {
urls = [urls];
}

// Convert any non-images to images
/**
* @type {any[] | Tensor}
*/
let images = await Promise.all(urls.map(x => this.preprocess(x)));

images.forEach(x => x.dims = [1, ...x.dims]) // add batch dimension
Expand All @@ -161,12 +217,30 @@ class ViTFeatureExtractor extends FeatureExtractor {
}

}

/**
* Class representing a feature extractor for whispered speech.
* @extends FeatureExtractor
*/
class WhisperFeatureExtractor extends FeatureExtractor {

/**
* Calculates the index offset for a given index and window size.
* @param {number} i - The index.
* @param {number} w - The window size.
* @returns {number} The index offset.
*/
calcOffset(i, w) {
return Math.abs((i + w) % (2 * w) - w);
}

/**
* Pads an array with a reflected version of itself on both ends.
* @param {Float32Array} array - The array to pad.
* @param {number} left - The amount of padding to add to the left.
* @param {number} right - The amount of padding to add to the right.
* @returns {Float32Array} The padded array.
*/
padReflect(array, left, right) {
const padded = new Float32Array(array.length + left + right);
const w = array.length - 1;
Expand All @@ -186,6 +260,15 @@ class WhisperFeatureExtractor extends FeatureExtractor {
return padded;
}

/**
* Calculates the complex Short-Time Fourier Transform (STFT) of the given framed signal.
*
* @param {Array<Array<number>>} frames - A 2D array representing the signal frames.
* @param {Array<number>} window - A 1D array representing the window to be applied to the frames.
* @returns {Object} An object with the following properties:
* - data: A 1D array representing the complex STFT of the signal.
* - dims: An array representing the dimensions of the STFT data, i.e. [num_frames, num_fft_bins].
*/
stft(frames, window) {
// Calculates the complex Short-Time Fourier Transform (STFT) of the given framed signal.
//
Expand Down Expand Up @@ -285,6 +368,14 @@ class WhisperFeatureExtractor extends FeatureExtractor {
dims: [frames.length, num_fft_bins] // [3001, 402]
};
}

/**
* Creates an array of frames from a given waveform.
*
* @param {Float32Array} waveform - The waveform to create frames from.
* @param {boolean} [center=true] - Whether to center the frames on their corresponding positions in the waveform. Defaults to true.
* @returns {Array} An array of frames.
*/
fram_wave(waveform, center = true) {
const frames = [];
const half_window = Math.floor((this.config.n_fft - 1) / 2) + 1;
Expand Down Expand Up @@ -336,6 +427,12 @@ class WhisperFeatureExtractor extends FeatureExtractor {
return frames;
}

/**
* Generates a Hanning window of length M.
*
* @param {number} M - The length of the Hanning window to generate.
* @returns {Float32Array} - The generated Hanning window.
*/
hanning(M) {
if (M < 1) {
return [];
Expand All @@ -351,6 +448,12 @@ class WhisperFeatureExtractor extends FeatureExtractor {
}
return cos_vals;
}

/**
* Computes the log-Mel spectrogram of the provided audio waveform.
* @param {Float32Array} waveform - The audio waveform to process.
* @returns {{data: Float32Array, dims: number[]}} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
*/
_extract_fbank_features(waveform) {
// Compute the log-Mel spectrogram of the provided audio

Expand Down Expand Up @@ -428,6 +531,12 @@ class WhisperFeatureExtractor extends FeatureExtractor {
};
}

/**
* Asynchronously extracts features from a given audio using the provided configuration.
* @param {Float32Array} audio - The audio data as a Float32Array.
* @returns {Promise<{ input_features: Tensor }>} - A Promise resolving to an object containing the extracted input features as a Tensor.
* @async
*/
async _call(audio) {
// audio is a float32array

Expand All @@ -451,19 +560,43 @@ class WhisperFeatureExtractor extends FeatureExtractor {
}
}

/**
* Represents a Processor that extracts features from an input.
* @extends Callable
*/
class Processor extends Callable {
/**
* Creates a new Processor with the given feature extractor.
* @param {function} feature_extractor - The function used to extract features from the input.
*/
constructor(feature_extractor) {
super();
this.feature_extractor = feature_extractor;
// TODO use tokenizer here?
}

/**
* Calls the feature_extractor function with the given input.
* @param {any} input - The input to extract features from.
* @returns {Promise<any>} A Promise that resolves with the extracted features.
* @async
*/
async _call(input) {
return await this.feature_extractor(input);
}
}


/**
* Represents a WhisperProcessor that extracts features from an audio input.
* @extends Processor
*/
class WhisperProcessor extends Processor {
/**
* Calls the feature_extractor function with the given audio input.
* @param {any} audio - The audio input to extract features from.
* @returns {Promise<any>} A Promise that resolves with the extracted features.
* @async
*/
async _call(audio) {
return await this.feature_extractor(audio)
}
Expand Down

0 comments on commit b2cb4e6

Please sign in to comment.