add jsdoc for processors.js

huggingface · Mar 22, 2023 · b2cb4e6 · b2cb4e6
1 parent d0d7824
commit b2cb4e6
Show file tree

Hide file tree

Showing 2 changed files with 165 additions and 8 deletions.
diff --git a/dist/types/processors.d.ts b/dist/types/processors.d.ts
@@ -1,11 +1,35 @@
+/**
+ * A helper class to determine model type from config.
+ */
 export class AutoProcessor {
-    static from_pretrained(modelPath: any, progressCallback?: any): Promise<WhisperProcessor>;
-}
-declare class WhisperProcessor extends Processor {
+    /**
+     * Returns a new instance of a Processor with a feature extractor
+     * based on the configuration file located at `modelPath`.
+     *
+     * @param {string} modelPath - The path to the model directory.
+     * @param {function} progressCallback - A callback function to track the loading progress (optional).
+     * @returns {Promise<Processor>} A Promise that resolves with a new instance of a Processor.
+     * @throws {Error} If the feature extractor type specified in the configuration file is unknown.
+     */
+    static from_pretrained(modelPath: string, progressCallback?: Function): Promise<Processor>;
 }
+/**
+ * Represents a Processor that extracts features from an input.
+ * @extends Callable
+ */
 declare class Processor extends Callable {
-    constructor(feature_extractor: any);
-    feature_extractor: any;
+    /**
+     * Creates a new Processor with the given feature extractor.
+     * @param {function} feature_extractor - The function used to extract features from the input.
+     */
+    constructor(feature_extractor: Function);
+    feature_extractor: Function;
+    /**
+     * Calls the feature_extractor function with the given input.
+     * @param {any} input - The input to extract features from.
+     * @returns {Promise<any>} A Promise that resolves with the extracted features.
+     * @async
+     */
     _call(input: any): Promise<any>;
 }
 import { Callable } from "./utils.js";

diff --git a/src/processors.js b/src/processors.js
@@ -15,9 +15,20 @@ const Jimp = (typeof self !== 'undefined') ? (self.Jimp || _Jimp) : _Jimp;
 
 const B64_STRING = /^data:image\/\w+;base64,/;
 
+/**
+ * A helper class to determine model type from config.
+ */
 class AutoProcessor {
     // Helper class to determine model type from config
-
+    /**
+     * Returns a new instance of a Processor with a feature extractor
+     * based on the configuration file located at `modelPath`.
+     *
+     * @param {string} modelPath - The path to the model directory.
+     * @param {function} progressCallback - A callback function to track the loading progress (optional).
+     * @returns {Promise<Processor>} A Promise that resolves with a new instance of a Processor.
+     * @throws {Error} If the feature extractor type specified in the configuration file is unknown.
+     */
     static async from_pretrained(modelPath, progressCallback = null) {
 
         let preprocessorConfig = await fetchJSON(modelPath, 'preprocessor_config.json', progressCallback)
@@ -57,14 +68,41 @@ class AutoProcessor {
     }
 }
 
+/**
+ * Base class for feature extractors.
+ *
+ * @extends Callable
+ */
 class FeatureExtractor extends Callable {
+    /**
+     * Constructs a new FeatureExtractor instance.
+     *
+     * @param {object} config - The configuration for the feature extractor.
+     */
     constructor(config) {
         super();
         this.config = config
     }
 }
+
+/**
+ * Feature extractor for Vision Transformer (ViT) models.
+ *
+ * @extends FeatureExtractor
+ */
 class ViTFeatureExtractor extends FeatureExtractor {
 
+    /**
+     * Constructs a new ViTFeatureExtractor instance.
+     *
+     * @param {object} config - The configuration for the feature extractor.
+     * @param {number[]} config.image_mean - The mean values for image normalization.
+     * @param {number[]} config.image_std - The standard deviation values for image normalization.
+     * @param {boolean} config.do_rescale - Whether to rescale the image pixel values to the [0,1] range.
+     * @param {boolean} config.do_normalize - Whether to normalize the image pixel values.
+     * @param {boolean} config.do_resize - Whether to resize the image.
+     * @param {number} config.size - The size to resize the image to.
+     */
     constructor(config) {
         super(config);
 
@@ -85,9 +123,17 @@ class ViTFeatureExtractor extends FeatureExtractor {
         this.size = this.config.size;
     }
 
-
+    /**
+     * Preprocesses the given image.
+     *
+     * @param {string} url - The URL of the image to preprocess.
+     * @returns {Promise<any>} The preprocessed image as a Tensor.
+     */
     async preprocess(url) {
 
+        /**
+         * @type {string | Buffer | Uint8Array}
+         */
         let imgToLoad = url;
         if (B64_STRING.test(url)) {
             imgToLoad = imgToLoad.replace(B64_STRING, '');
@@ -143,12 +189,22 @@ class ViTFeatureExtractor extends FeatureExtractor {
         return transposed;
     }
 
+    /**
+     * Calls the feature extraction process on an array of image
+     * URLs, preprocesses each image, and concatenates the resulting
+     * features into a single Tensor.
+     * @param {string|string[]} urls - The URL(s) of the image(s) to extract features from.
+     * @returns {Promise<Object>} An object containing the concatenated pixel values of the preprocessed images.
+     */
     async _call(urls) {
         if (!Array.isArray(urls)) {
             urls = [urls];
         }
 
         // Convert any non-images to images
+        /**
+         * @type {any[] | Tensor}
+         */
         let images = await Promise.all(urls.map(x => this.preprocess(x)));
 
         images.forEach(x => x.dims = [1, ...x.dims]) // add batch dimension
@@ -161,12 +217,30 @@ class ViTFeatureExtractor extends FeatureExtractor {
     }
 
 }
+
+/**
+ * Class representing a feature extractor for whispered speech.
+ * @extends FeatureExtractor
+ */
 class WhisperFeatureExtractor extends FeatureExtractor {
 
+    /**
+     * Calculates the index offset for a given index and window size.
+     * @param {number} i - The index.
+     * @param {number} w - The window size.
+     * @returns {number} The index offset.
+     */
     calcOffset(i, w) {
         return Math.abs((i + w) % (2 * w) - w);
     }
 
+    /**
+     * Pads an array with a reflected version of itself on both ends.
+     * @param {Float32Array} array - The array to pad.
+     * @param {number} left - The amount of padding to add to the left.
+     * @param {number} right - The amount of padding to add to the right.
+     * @returns {Float32Array} The padded array.
+     */
     padReflect(array, left, right) {
         const padded = new Float32Array(array.length + left + right);
         const w = array.length - 1;
@@ -186,6 +260,15 @@ class WhisperFeatureExtractor extends FeatureExtractor {
         return padded;
     }
 
+    /**
+     * Calculates the complex Short-Time Fourier Transform (STFT) of the given framed signal.
+     * 
+     * @param {Array<Array<number>>} frames - A 2D array representing the signal frames.
+     * @param {Array<number>} window - A 1D array representing the window to be applied to the frames.
+     * @returns {Object} An object with the following properties:
+     * - data: A 1D array representing the complex STFT of the signal.
+     * - dims: An array representing the dimensions of the STFT data, i.e. [num_frames, num_fft_bins].
+     */
     stft(frames, window) {
         // Calculates the complex Short-Time Fourier Transform (STFT) of the given framed signal.
         // 
@@ -285,6 +368,14 @@ class WhisperFeatureExtractor extends FeatureExtractor {
             dims: [frames.length, num_fft_bins] // [3001, 402]
         };
     }
+
+    /**
+     * Creates an array of frames from a given waveform.
+     *
+     * @param {Float32Array} waveform - The waveform to create frames from.
+     * @param {boolean} [center=true] - Whether to center the frames on their corresponding positions in the waveform. Defaults to true.
+     * @returns {Array} An array of frames.
+     */
     fram_wave(waveform, center = true) {
         const frames = [];
         const half_window = Math.floor((this.config.n_fft - 1) / 2) + 1;
@@ -336,6 +427,12 @@ class WhisperFeatureExtractor extends FeatureExtractor {
         return frames;
     }
 
+    /**
+     * Generates a Hanning window of length M.
+     *
+     * @param {number} M - The length of the Hanning window to generate.
+     * @returns {Float32Array} - The generated Hanning window.
+     */
     hanning(M) {
         if (M < 1) {
             return [];
@@ -351,6 +448,12 @@ class WhisperFeatureExtractor extends FeatureExtractor {
         }
         return cos_vals;
     }
+
+    /**
+     * Computes the log-Mel spectrogram of the provided audio waveform.
+     * @param {Float32Array} waveform - The audio waveform to process.
+     * @returns {{data: Float32Array, dims: number[]}} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
+     */
     _extract_fbank_features(waveform) {
         // Compute the log-Mel spectrogram of the provided audio
 
@@ -428,6 +531,12 @@ class WhisperFeatureExtractor extends FeatureExtractor {
         };
     }
 
+    /**
+     * Asynchronously extracts features from a given audio using the provided configuration.
+     * @param {Float32Array} audio - The audio data as a Float32Array.
+     * @returns {Promise<{ input_features: Tensor }>} - A Promise resolving to an object containing the extracted input features as a Tensor.
+     * @async
+    */
     async _call(audio) {
         // audio is a float32array
 
@@ -451,19 +560,43 @@ class WhisperFeatureExtractor extends FeatureExtractor {
     }
 }
 
+/**
+ * Represents a Processor that extracts features from an input.
+ * @extends Callable
+ */
 class Processor extends Callable {
+    /**
+     * Creates a new Processor with the given feature extractor.
+     * @param {function} feature_extractor - The function used to extract features from the input.
+     */
     constructor(feature_extractor) {
         super();
         this.feature_extractor = feature_extractor;
         // TODO use tokenizer here?
     }
+
+    /**
+     * Calls the feature_extractor function with the given input.
+     * @param {any} input - The input to extract features from.
+     * @returns {Promise<any>} A Promise that resolves with the extracted features.
+     * @async
+     */
     async _call(input) {
         return await this.feature_extractor(input);
     }
 }
 
-
+/**
+ * Represents a WhisperProcessor that extracts features from an audio input.
+ * @extends Processor
+ */
 class WhisperProcessor extends Processor {
+    /**
+     * Calls the feature_extractor function with the given audio input.
+     * @param {any} audio - The audio input to extract features from.
+     * @returns {Promise<any>} A Promise that resolves with the extracted features.
+     * @async
+     */
     async _call(audio) {
         return await this.feature_extractor(audio)
     }