From fc88b258c98f39ffb2f5074c6e637f4dfe9a9410 Mon Sep 17 00:00:00 2001 From: WFH Brian Date: Tue, 3 Dec 2024 07:56:23 -0500 Subject: [PATCH] cleanup --- smart-embed-model/adapters/transformers.js | 2 +- .../connectors/transformers_iframe.js | 2 +- .../connectors/transformers_worker.js | 418 +++++++++++------- smart-environment/smart_env.js | 1 - 4 files changed, 251 insertions(+), 172 deletions(-) diff --git a/smart-embed-model/adapters/transformers.js b/smart-embed-model/adapters/transformers.js index 14254c47..5f50e4d1 100644 --- a/smart-embed-model/adapters/transformers.js +++ b/smart-embed-model/adapters/transformers.js @@ -284,7 +284,7 @@ export const transformers_settings_config = { description: "Number of embeddings to process per batch on GPU. Use 0 to disable GPU.", placeholder: "Enter number ex. 10", }, - "legacy_transformers": { + "[ADAPTER].legacy_transformers": { name: 'Legacy Transformers (no GPU)', type: "toggle", description: "Use legacy transformers (v2) instead of v3.", diff --git a/smart-embed-model/connectors/transformers_iframe.js b/smart-embed-model/connectors/transformers_iframe.js index 23cb2944..a8017f90 100644 --- a/smart-embed-model/connectors/transformers_iframe.js +++ b/smart-embed-model/connectors/transformers_iframe.js @@ -1 +1 @@ -export const transformers_connector = "var __defProp = Object.defineProperty;\nvar __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;\nvar __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== \"symbol\" ? key + \"\" : key, value);\n\n// ../smart-model/smart_model.js\nvar SmartModel = class {\n /**\n * Create a SmartModel instance.\n * @param {Object} opts - Configuration options\n * @param {Object} opts.adapters - Map of adapter names to adapter classes\n * @param {Object} opts.settings - Model settings configuration\n * @param {Object} opts.model_config - Model-specific configuration\n * @param {string} opts.model_config.adapter - Name of the adapter to use\n * @param {string} [opts.model_key] - Optional model identifier to override settings\n * @throws {Error} If required options are missing\n */\n constructor(opts = {}) {\n this.opts = opts;\n this.validate_opts(opts);\n this.state = \"unloaded\";\n this._adapter = null;\n }\n /**\n * Initialize the model by loading the configured adapter.\n * @async\n * @returns {Promise}\n */\n async initialize() {\n this.load_adapter(this.adapter_name);\n await this.load();\n }\n /**\n * Validate required options.\n * @param {Object} opts - Configuration options\n */\n validate_opts(opts) {\n if (!opts.adapters) throw new Error(\"opts.adapters is required\");\n if (!opts.settings) throw new Error(\"opts.settings is required\");\n }\n /**\n * Get the current settings\n * @returns {Object} Current settings\n */\n get settings() {\n if (!this.opts.settings) this.opts.settings = {\n ...this.constructor.defaults\n };\n return this.opts.settings;\n }\n /**\n * Get the current adapter name\n * @returns {string} Current adapter name\n */\n get adapter_name() {\n const adapter_key = this.models[this.model_key]?.adapter;\n if (!adapter_key || !this.adapters[adapter_key]) throw new Error(`Platform \"${adapter_key}\" not supported`);\n return adapter_key;\n }\n /**\n * Get adapter-specific settings.\n * @returns {Object} Settings for current adapter\n */\n get adapter_settings() {\n if (!this.settings[this.adapter_name]) this.settings[this.adapter_name] = {};\n return this.settings[this.adapter_name];\n }\n get adapter_config() {\n const base_config = this.adapters[this.adapter_name]?.defaults || {};\n return {\n ...base_config,\n ...this.adapter_settings,\n ...this.opts.adapter_config\n };\n }\n /**\n * Get the default model key to use\n * @returns {string} Default model identifier\n */\n get default_model_key() {\n throw new Error(\"default_model_key must be overridden in sub-class\");\n }\n /**\n * Get available models configuration\n * @returns {Object} Map of model configurations\n */\n get models() {\n }\n /**\n * Get the current model key\n * @returns {string} Current model key\n */\n get model_key() {\n return this.opts.model_key || this.settings.model_key || this.adapter_config.model_key || this.default_model_key;\n }\n /**\n * Get the current model configuration\n * @returns {Object} Combined base and custom model configuration\n */\n get model_config() {\n const model_key = this.model_key;\n const base_model_config = this.models[model_key] || {};\n return {\n ...this.adapter_config,\n ...base_model_config,\n ...this.opts.model_config\n };\n }\n get model_settings() {\n if (!this.settings[this.model_key]) this.settings[this.model_key] = {};\n return this.settings[this.model_key];\n }\n /**\n * Load the current adapter and transition to loaded state.\n * @async\n * @returns {Promise}\n */\n async load() {\n this.set_state(\"loading\");\n if (!this.adapter?.loaded) {\n await this.invoke_adapter_method(\"load\");\n }\n this.set_state(\"loaded\");\n }\n /**\n * Unload the current adapter and transition to unloaded state.\n * @async\n * @returns {Promise}\n */\n async unload() {\n if (this.adapter?.loaded) {\n this.set_state(\"unloading\");\n await this.invoke_adapter_method(\"unload\");\n this.set_state(\"unloaded\");\n }\n }\n /**\n * Set the model's state.\n * @param {('unloaded'|'loading'|'loaded'|'unloading')} new_state - The new state\n * @throws {Error} If the state is invalid\n */\n set_state(new_state) {\n const valid_states = [\"unloaded\", \"loading\", \"loaded\", \"unloading\"];\n if (!valid_states.includes(new_state)) {\n throw new Error(`Invalid state: ${new_state}`);\n }\n this.state = new_state;\n }\n get is_loading() {\n return this.state === \"loading\";\n }\n get is_loaded() {\n return this.state === \"loaded\";\n }\n get is_unloading() {\n return this.state === \"unloading\";\n }\n get is_unloaded() {\n return this.state === \"unloaded\";\n }\n // ADAPTERS\n /**\n * Get the map of available adapters\n * @returns {Object} Map of adapter names to adapter classes\n */\n get adapters() {\n return this.opts.adapters || {};\n }\n /**\n * Load a specific adapter by name.\n * @async\n * @param {string} adapter_name - Name of the adapter to load\n * @throws {Error} If adapter not found or loading fails\n * @returns {Promise}\n */\n async load_adapter(adapter_name) {\n this.set_adapter(adapter_name);\n if (!this._adapter.loaded) {\n this.set_state(\"loading\");\n try {\n await this.invoke_adapter_method(\"load\");\n this.set_state(\"loaded\");\n } catch (err) {\n this.set_state(\"unloaded\");\n throw new Error(`Failed to load adapter: ${err.message}`);\n }\n }\n }\n /**\n * Set an adapter instance by name without loading it.\n * @param {string} adapter_name - Name of the adapter to set\n * @throws {Error} If adapter not found\n */\n set_adapter(adapter_name) {\n const AdapterClass = this.adapters[adapter_name];\n if (!AdapterClass) {\n throw new Error(`Adapter \"${adapter_name}\" not found.`);\n }\n if (this._adapter?.constructor.name.toLowerCase() === adapter_name.toLowerCase()) {\n return;\n }\n this._adapter = new AdapterClass(this);\n }\n /**\n * Get the current active adapter instance\n * @returns {Object} The active adapter instance\n * @throws {Error} If adapter not found\n */\n get adapter() {\n const adapter_name = this.adapter_name;\n if (!adapter_name) {\n throw new Error(`Adapter not set for model.`);\n }\n if (!this._adapter) {\n this.load_adapter(adapter_name);\n }\n return this._adapter;\n }\n /**\n * Ensure the adapter is ready to execute a method.\n * @param {string} method - Name of the method to check\n * @throws {Error} If adapter not loaded or method not implemented\n */\n ensure_adapter_ready(method) {\n if (!this.adapter) {\n throw new Error(\"No adapter loaded.\");\n }\n if (typeof this.adapter[method] !== \"function\") {\n throw new Error(`Adapter does not implement method: ${method}`);\n }\n }\n /**\n * Invoke a method on the current adapter.\n * @async\n * @param {string} method - Name of the method to call\n * @param {...any} args - Arguments to pass to the method\n * @returns {Promise} Result from the adapter method\n * @throws {Error} If adapter not ready or method fails\n */\n async invoke_adapter_method(method, ...args) {\n this.ensure_adapter_ready(method);\n return await this.adapter[method](...args);\n }\n // SETTINGS\n /**\n * Get the settings configuration schema\n * @returns {Object} Settings configuration object\n */\n get settings_config() {\n return this.process_settings_config({\n // SETTINGS GO HERE\n });\n }\n /**\n * Process settings configuration with conditionals and prefixes.\n * @param {Object} _settings_config - Raw settings configuration\n * @param {string} [prefix] - Optional prefix for setting keys\n * @returns {Object} Processed settings configuration\n */\n process_settings_config(_settings_config, prefix = null) {\n return Object.entries(_settings_config).reduce((acc, [key, val]) => {\n if (val.conditional) {\n if (!val.conditional(this)) return acc;\n delete val.conditional;\n }\n const new_key = (prefix ? prefix + \".\" : \"\") + this.process_setting_key(key);\n acc[new_key] = val;\n return acc;\n }, {});\n }\n /**\n * Process an individual setting key.\n * @param {string} key - Setting key to process\n * @returns {string} Processed setting key\n */\n process_setting_key(key) {\n return key;\n }\n // override in sub-class if needed for prefixes and variable replacements\n};\n__publicField(SmartModel, \"defaults\", {\n // override in sub-class if needed\n});\n\n// models.json\nvar models_default = {\n \"TaylorAI/bge-micro-v2\": {\n id: \"TaylorAI/bge-micro-v2\",\n batch_size: 1,\n dims: 384,\n max_tokens: 512,\n name: \"BGE-micro-v2\",\n description: \"Local, 512 tokens, 384 dim (recommended)\",\n adapter: \"transformers\"\n },\n \"TaylorAI/gte-tiny\": {\n id: \"TaylorAI/gte-tiny\",\n batch_size: 1,\n dims: 384,\n max_tokens: 512,\n name: \"GTE-tiny\",\n description: \"Local, 512 tokens, 384 dim\",\n adapter: \"transformers\"\n },\n \"Mihaiii/Ivysaur\": {\n id: \"Mihaiii/Ivysaur\",\n batch_size: 1,\n dims: 384,\n max_tokens: 512,\n name: \"Ivysaur\",\n description: \"Local, 512 tokens, 384 dim\",\n adapter: \"transformers\"\n },\n \"andersonbcdefg/bge-small-4096\": {\n id: \"andersonbcdefg/bge-small-4096\",\n batch_size: 1,\n dims: 384,\n max_tokens: 4096,\n name: \"BGE-small-4K\",\n description: \"Local, 4,096 tokens, 384 dim\",\n adapter: \"transformers\"\n },\n \"Xenova/jina-embeddings-v2-base-zh\": {\n id: \"Xenova/jina-embeddings-v2-base-zh\",\n batch_size: 1,\n dims: 512,\n max_tokens: 8192,\n name: \"Jina-v2-base-zh-8K\",\n description: \"Local, 8,192 tokens, 512 dim, Chinese/English bilingual\",\n adapter: \"transformers\"\n },\n \"text-embedding-3-small\": {\n id: \"text-embedding-3-small\",\n batch_size: 50,\n dims: 1536,\n max_tokens: 8191,\n name: \"OpenAI Text-3 Small\",\n description: \"API, 8,191 tokens, 1,536 dim\",\n endpoint: \"https://api.openai.com/v1/embeddings\",\n adapter: \"openai\"\n },\n \"text-embedding-3-large\": {\n id: \"text-embedding-3-large\",\n batch_size: 50,\n dims: 3072,\n max_tokens: 8191,\n name: \"OpenAI Text-3 Large\",\n description: \"API, 8,191 tokens, 3,072 dim\",\n endpoint: \"https://api.openai.com/v1/embeddings\",\n adapter: \"openai\"\n },\n \"text-embedding-3-small-512\": {\n id: \"text-embedding-3-small\",\n batch_size: 50,\n dims: 512,\n max_tokens: 8191,\n name: \"OpenAI Text-3 Small - 512\",\n description: \"API, 8,191 tokens, 512 dim\",\n endpoint: \"https://api.openai.com/v1/embeddings\",\n adapter: \"openai\"\n },\n \"text-embedding-3-large-256\": {\n id: \"text-embedding-3-large\",\n batch_size: 50,\n dims: 256,\n max_tokens: 8191,\n name: \"OpenAI Text-3 Large - 256\",\n description: \"API, 8,191 tokens, 256 dim\",\n endpoint: \"https://api.openai.com/v1/embeddings\",\n adapter: \"openai\"\n },\n \"text-embedding-ada-002\": {\n id: \"text-embedding-ada-002\",\n batch_size: 50,\n dims: 1536,\n max_tokens: 8191,\n name: \"OpenAI Ada\",\n description: \"API, 8,191 tokens, 1,536 dim\",\n endpoint: \"https://api.openai.com/v1/embeddings\",\n adapter: \"openai\"\n },\n \"Xenova/jina-embeddings-v2-small-en\": {\n id: \"Xenova/jina-embeddings-v2-small-en\",\n batch_size: 1,\n dims: 512,\n max_tokens: 8192,\n name: \"Jina-v2-small-en\",\n description: \"Local, 8,192 tokens, 512 dim\",\n adapter: \"transformers\"\n },\n \"nomic-ai/nomic-embed-text-v1.5\": {\n id: \"nomic-ai/nomic-embed-text-v1.5\",\n batch_size: 1,\n dims: 256,\n max_tokens: 8192,\n name: \"Nomic-embed-text-v1.5\",\n description: \"Local, 8,192 tokens, 256 dim\",\n adapter: \"transformers\"\n },\n \"Xenova/bge-small-en-v1.5\": {\n id: \"Xenova/bge-small-en-v1.5\",\n batch_size: 1,\n dims: 384,\n max_tokens: 512,\n name: \"BGE-small\",\n description: \"Local, 512 tokens, 384 dim\",\n adapter: \"transformers\"\n },\n \"nomic-ai/nomic-embed-text-v1\": {\n id: \"nomic-ai/nomic-embed-text-v1\",\n batch_size: 1,\n dims: 768,\n max_tokens: 2048,\n name: \"Nomic-embed-text\",\n description: \"Local, 2,048 tokens, 768 dim\",\n adapter: \"transformers\"\n }\n};\n\n// smart_embed_model.js\nvar SmartEmbedModel = class extends SmartModel {\n /**\n * Create a SmartEmbedModel instance\n * @param {Object} opts - Configuration options\n * @param {Object} [opts.adapters] - Map of available adapter implementations\n * @param {boolean} [opts.use_gpu] - Whether to enable GPU acceleration\n * @param {number} [opts.gpu_batch_size] - Batch size when using GPU\n * @param {number} [opts.batch_size] - Default batch size for processing\n * @param {Object} [opts.model_config] - Model-specific configuration\n * @param {string} [opts.model_config.adapter] - Override adapter type\n * @param {number} [opts.model_config.dims] - Embedding dimensions\n * @param {number} [opts.model_config.max_tokens] - Maximum tokens to process\n * @param {Object} [opts.settings] - User settings\n * @param {string} [opts.settings.api_key] - API key for remote models\n * @param {number} [opts.settings.min_chars] - Minimum text length to embed\n */\n constructor(opts = {}) {\n super(opts);\n }\n /**\n * Count tokens in an input string\n * @param {string} input - Text to tokenize\n * @returns {Promise} Token count result\n * @property {number} tokens - Number of tokens in input\n * \n * @example\n * ```javascript\n * const result = await model.count_tokens(\"Hello world\");\n * console.log(result.tokens); // 2\n * ```\n */\n async count_tokens(input) {\n return await this.invoke_adapter_method(\"count_tokens\", input);\n }\n /**\n * Generate embeddings for a single input\n * @param {string|Object} input - Text or object with embed_input property\n * @returns {Promise} Embedding result\n * @property {number[]} vec - Embedding vector\n * @property {number} tokens - Token count\n * \n * @example\n * ```javascript\n * const result = await model.embed(\"Hello world\");\n * console.log(result.vec); // [0.1, 0.2, ...]\n * ```\n */\n async embed(input) {\n if (typeof input === \"string\") input = { embed_input: input };\n return (await this.embed_batch([input]))[0];\n }\n /**\n * Generate embeddings for multiple inputs in batch\n * @param {Array} inputs - Array of texts or objects with embed_input\n * @returns {Promise>} Array of embedding results\n * @property {number[]} vec - Embedding vector for each input\n * @property {number} tokens - Token count for each input\n * \n * @example\n * ```javascript\n * const results = await model.embed_batch([\n * { embed_input: \"First text\" },\n * { embed_input: \"Second text\" }\n * ]);\n * ```\n */\n async embed_batch(inputs) {\n return await this.invoke_adapter_method(\"embed_batch\", inputs);\n }\n /**\n * Get the current batch size based on GPU settings\n * @returns {number} Current batch size for processing\n */\n get batch_size() {\n return this.adapter.batch_size || 1;\n }\n /** @returns {Object} Map of available embedding models */\n get models() {\n return models_default;\n }\n /** @returns {string} Default model key if none specified */\n get default_model_key() {\n return \"TaylorAI/bge-micro-v2\";\n }\n /**\n * Get settings configuration schema\n * @returns {Object} Settings configuration object\n */\n get settings_config() {\n const _settings_config = {\n model_key: {\n name: \"Embedding Model\",\n type: \"dropdown\",\n description: \"Select an embedding model.\",\n options_callback: \"embed_model.get_embedding_model_options\",\n callback: \"embed_model_changed\",\n default: \"TaylorAI/bge-micro-v2\"\n },\n \"[EMBED_MODEL].min_chars\": {\n name: \"Minimum Embedding Length\",\n type: \"number\",\n description: \"Minimum length of note to embed.\",\n placeholder: \"Enter number ex. 300\"\n },\n ...this.adapter.settings_config || {}\n };\n return this.process_settings_config(_settings_config, \"embed_model\");\n }\n process_setting_key(key) {\n return key.replace(/\\[EMBED_MODEL\\]/g, this.model_key);\n }\n /**\n * Get available embedding model options\n * @returns {Array} Array of model options with value and name\n */\n get_embedding_model_options() {\n return Object.entries(this.models).map(([key, model2]) => ({ value: key, name: key }));\n }\n /**\n * Get embedding model options including 'None' option\n * @returns {Array} Array of model options with value and name\n */\n get_block_embedding_model_options() {\n const options = this.get_embedding_model_options();\n options.unshift({ value: \"None\", name: \"None\" });\n return options;\n }\n};\n__publicField(SmartEmbedModel, \"defaults\", {\n model_key: \"TaylorAI/bge-micro-v2\"\n});\n\n// ../smart-model/adapters/_adapter.js\nvar SmartModelAdapter = class {\n /**\n * Create a SmartModelAdapter instance.\n * @param {SmartModel} model - The parent SmartModel instance\n */\n constructor(model2) {\n this.model = model2;\n this.state = \"unloaded\";\n }\n /**\n * Load the adapter.\n * @async\n * @returns {Promise}\n */\n async load() {\n this.set_state(\"loaded\");\n }\n /**\n * Unload the adapter.\n * @returns {void}\n */\n unload() {\n this.set_state(\"unloaded\");\n }\n /**\n * Get all settings.\n * @returns {Object} All settings\n */\n get settings() {\n return this.model.settings;\n }\n /**\n * Get the current model key.\n * @returns {string} Current model identifier\n */\n get model_key() {\n return this.model.model_key;\n }\n /**\n * Get the current model configuration.\n * @returns {Object} Model configuration\n */\n get model_config() {\n return this.model.model_config;\n }\n /**\n * Get model-specific settings.\n * @returns {Object} Settings for current model\n */\n get model_settings() {\n return this.model.model_settings;\n }\n /**\n * Get adapter-specific configuration.\n * @returns {Object} Adapter configuration\n */\n get adapter_config() {\n return this.model.adapter_config;\n }\n /**\n * Get adapter-specific settings.\n * @returns {Object} Adapter settings\n */\n get adapter_settings() {\n return this.model.adapter_settings;\n }\n /**\n * Set the adapter's state.\n * @param {('unloaded'|'loading'|'loaded'|'unloading')} new_state - The new state\n * @throws {Error} If the state is invalid\n */\n set_state(new_state) {\n const valid_states = [\"unloaded\", \"loading\", \"loaded\", \"unloading\"];\n if (!valid_states.includes(new_state)) {\n throw new Error(`Invalid state: ${new_state}`);\n }\n this.state = new_state;\n }\n // Replace individual state getters/setters with a unified state management\n get is_loading() {\n return this.state === \"loading\";\n }\n get is_loaded() {\n return this.state === \"loaded\";\n }\n get is_unloading() {\n return this.state === \"unloading\";\n }\n get is_unloaded() {\n return this.state === \"unloaded\";\n }\n};\n\n// adapters/_adapter.js\nvar SmartEmbedAdapter = class extends SmartModelAdapter {\n /**\n * Create adapter instance\n * @param {SmartEmbedModel} model - Parent model instance\n */\n constructor(model2) {\n super(model2);\n this.smart_embed = model2;\n }\n /**\n * Count tokens in input text\n * @abstract\n * @param {string} input - Text to tokenize\n * @returns {Promise} Token count result\n * @property {number} tokens - Number of tokens in input\n * @throws {Error} If not implemented by subclass\n */\n async count_tokens(input) {\n throw new Error(\"count_tokens method not implemented\");\n }\n /**\n * Generate embeddings for single input\n * @abstract\n * @param {string|Object} input - Text to embed\n * @returns {Promise} Embedding result\n * @property {number[]} vec - Embedding vector\n * @property {number} tokens - Number of tokens in input\n * @throws {Error} If not implemented by subclass\n */\n async embed(input) {\n throw new Error(\"embed method not implemented\");\n }\n /**\n * Generate embeddings for multiple inputs\n * @abstract\n * @param {Array} inputs - Texts to embed\n * @returns {Promise>} Array of embedding results\n * @property {number[]} vec - Embedding vector for each input\n * @property {number} tokens - Number of tokens in each input\n * @throws {Error} If not implemented by subclass\n */\n async embed_batch(inputs) {\n throw new Error(\"embed_batch method not implemented\");\n }\n get dims() {\n return this.model_config.dims;\n }\n get max_tokens() {\n return this.model_config.max_tokens;\n }\n // get batch_size() { return this.model_config.batch_size; }\n get use_gpu() {\n if (typeof this._use_gpu === \"undefined\") {\n if (typeof this.model.opts.use_gpu !== \"undefined\") this._use_gpu = this.model.opts.use_gpu;\n else this._use_gpu = typeof navigator !== \"undefined\" && !!navigator?.gpu && this.model_settings.gpu_batch_size !== 0;\n }\n return this._use_gpu;\n }\n set use_gpu(value) {\n this._use_gpu = value;\n }\n get batch_size() {\n if (this.use_gpu && this.model_settings?.gpu_batch_size) return this.model_settings.gpu_batch_size;\n return this.model.opts.batch_size || this.model_config.batch_size || 1;\n }\n};\n\n// adapters/transformers.js\nvar SmartEmbedTransformersAdapter = class extends SmartEmbedAdapter {\n /**\n * Create transformers adapter instance\n * @param {SmartEmbedModel} model - Parent model instance\n */\n constructor(model2) {\n super(model2);\n this.pipeline = null;\n this.tokenizer = null;\n }\n /**\n * Load model and tokenizer\n * @returns {Promise}\n */\n async load() {\n await this.load_transformers();\n this.loaded = true;\n }\n /**\n * Unload model and free resources\n * @returns {Promise}\n */\n async unload() {\n if (this.pipeline) {\n if (this.pipeline.destroy) await this.pipeline.destroy();\n this.pipeline = null;\n }\n if (this.tokenizer) {\n this.tokenizer = null;\n }\n this.loaded = false;\n }\n /**\n * Initialize transformers pipeline and tokenizer\n * @private\n * @returns {Promise}\n */\n async load_transformers() {\n const { pipeline, env, AutoTokenizer } = await import(\"@xenova/transformers\");\n env.allowLocalModels = false;\n const pipeline_opts = {\n quantized: true\n };\n if (this.use_gpu) {\n console.log(\"[Transformers] Using GPU\");\n pipeline_opts.device = \"webgpu\";\n pipeline_opts.dtype = \"fp32\";\n } else {\n console.log(\"[Transformers] Using CPU\");\n env.backends.onnx.wasm.numThreads = 8;\n }\n this.pipeline = await pipeline(\"feature-extraction\", this.model_key, pipeline_opts);\n this.tokenizer = await AutoTokenizer.from_pretrained(this.model_key);\n }\n /**\n * Count tokens in input text\n * @param {string} input - Text to tokenize\n * @returns {Promise} Token count result\n */\n async count_tokens(input) {\n if (!this.tokenizer) await this.load();\n const { input_ids } = await this.tokenizer(input);\n return { tokens: input_ids.data.length };\n }\n /**\n * Generate embeddings for multiple inputs\n * @param {Array} inputs - Array of input objects\n * @returns {Promise>} Processed inputs with embeddings\n */\n async embed_batch(inputs) {\n if (!this.pipeline) await this.load();\n const filtered_inputs = inputs.filter((item) => item.embed_input?.length > 0);\n if (!filtered_inputs.length) return [];\n if (filtered_inputs.length > this.batch_size) {\n console.log(`Processing ${filtered_inputs.length} inputs in batches of ${this.batch_size}`);\n const results = [];\n for (let i = 0; i < filtered_inputs.length; i += this.batch_size) {\n const batch = filtered_inputs.slice(i, i + this.batch_size);\n const batch_results = await this._process_batch(batch);\n results.push(...batch_results);\n }\n return results;\n }\n return await this._process_batch(filtered_inputs);\n }\n /**\n * Process a single batch of inputs\n * @private\n * @param {Array} batch_inputs - Batch of inputs to process\n * @returns {Promise>} Processed batch results\n */\n async _process_batch(batch_inputs) {\n const tokens = await Promise.all(batch_inputs.map((item) => this.count_tokens(item.embed_input)));\n const embed_inputs = await Promise.all(batch_inputs.map(async (item, i) => {\n if (tokens[i].tokens < this.max_tokens) return item.embed_input;\n let token_ct = tokens[i].tokens;\n let truncated_input = item.embed_input;\n while (token_ct > this.max_tokens) {\n const pct = this.max_tokens / token_ct;\n const max_chars = Math.floor(truncated_input.length * pct * 0.9);\n truncated_input = truncated_input.substring(0, max_chars) + \"...\";\n token_ct = (await this.count_tokens(truncated_input)).tokens;\n }\n tokens[i].tokens = token_ct;\n return truncated_input;\n }));\n try {\n const resp = await this.pipeline(embed_inputs, { pooling: \"mean\", normalize: true });\n return batch_inputs.map((item, i) => {\n item.vec = Array.from(resp[i].data).map((val) => Math.round(val * 1e8) / 1e8);\n item.tokens = tokens[i].tokens;\n return item;\n });\n } catch (err) {\n console.error(\"error_processing_batch\", err);\n return Promise.all(batch_inputs.map(async (item) => {\n try {\n const result = await this.pipeline(item.embed_input, { pooling: \"mean\", normalize: true });\n item.vec = Array.from(result[0].data).map((val) => Math.round(val * 1e8) / 1e8);\n item.tokens = (await this.count_tokens(item.embed_input)).tokens;\n return item;\n } catch (single_err) {\n console.error(\"error_processing_single_item\", single_err);\n return {\n ...item,\n vec: [],\n tokens: 0,\n error: single_err.message\n };\n }\n }));\n }\n }\n /** @returns {Object} Settings configuration for transformers adapter */\n get settings_config() {\n return transformers_settings_config;\n }\n};\nvar transformers_settings_config = {\n \"[EMBED_MODEL].gpu_batch_size\": {\n name: \"GPU Batch Size\",\n type: \"number\",\n description: \"Number of embeddings to process per batch on GPU. Use 0 to disable GPU.\",\n placeholder: \"Enter number ex. 10\"\n },\n \"legacy_transformers\": {\n name: \"Legacy Transformers (no GPU)\",\n type: \"toggle\",\n description: \"Use legacy transformers (v2) instead of v3.\",\n callback: \"embed_model_changed\",\n default: true\n }\n};\n\n// build/transformers_iframe_script.js\nvar model = null;\nasync function process_message(data) {\n const { method, params, id, iframe_id } = data;\n try {\n let result;\n switch (method) {\n case \"init\":\n console.log(\"init\");\n break;\n case \"load\":\n console.log(\"load\", params);\n model = new SmartEmbedModel({\n ...params,\n adapters: { transformers: SmartEmbedTransformersAdapter },\n adapter: \"transformers\",\n settings: {}\n });\n await model.load();\n result = { model_loaded: true };\n break;\n case \"embed_batch\":\n if (!model) throw new Error(\"Model not loaded\");\n result = await model.embed_batch(params.inputs);\n break;\n case \"count_tokens\":\n if (!model) throw new Error(\"Model not loaded\");\n result = await model.count_tokens(params);\n break;\n default:\n throw new Error(`Unknown method: ${method}`);\n }\n return { id, result, iframe_id };\n } catch (error) {\n console.error(\"Error processing message:\", error);\n return { id, error: error.message, iframe_id };\n }\n}\nprocess_message({ method: \"init\" });\n"; \ No newline at end of file +export const transformers_connector = "var __defProp = Object.defineProperty;\nvar __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;\nvar __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== \"symbol\" ? key + \"\" : key, value);\n\n// ../smart-model/smart_model.js\nvar SmartModel = class {\n /**\n * Create a SmartModel instance.\n * @param {Object} opts - Configuration options\n * @param {Object} opts.adapters - Map of adapter names to adapter classes\n * @param {Object} opts.settings - Model settings configuration\n * @param {Object} opts.model_config - Model-specific configuration\n * @param {string} opts.model_config.adapter - Name of the adapter to use\n * @param {string} [opts.model_key] - Optional model identifier to override settings\n * @throws {Error} If required options are missing\n */\n constructor(opts = {}) {\n __publicField(this, \"scope_name\", \"smart_model\");\n this.opts = opts;\n this.validate_opts(opts);\n this.state = \"unloaded\";\n this._adapter = null;\n }\n /**\n * Initialize the model by loading the configured adapter.\n * @async\n * @returns {Promise}\n */\n async initialize() {\n this.load_adapter(this.adapter_name);\n await this.load();\n }\n /**\n * Validate required options.\n * @param {Object} opts - Configuration options\n */\n validate_opts(opts) {\n if (!opts.adapters) throw new Error(\"opts.adapters is required\");\n if (!opts.settings) throw new Error(\"opts.settings is required\");\n }\n /**\n * Get the current settings\n * @returns {Object} Current settings\n */\n get settings() {\n if (!this.opts.settings) this.opts.settings = {\n ...this.constructor.defaults\n };\n return this.opts.settings;\n }\n /**\n * Get the current adapter name\n * @returns {string} Current adapter name\n */\n get adapter_name() {\n const adapter_key = this.opts.model_config?.adapter || this.opts.adapter || this.settings.adapter || Object.keys(this.adapters)[0];\n if (!adapter_key || !this.adapters[adapter_key]) throw new Error(`Platform \"${adapter_key}\" not supported`);\n return adapter_key;\n }\n /**\n * Get adapter-specific settings.\n * @returns {Object} Settings for current adapter\n */\n get adapter_settings() {\n if (!this.settings[this.adapter_name]) this.settings[this.adapter_name] = {};\n return this.settings[this.adapter_name];\n }\n get adapter_config() {\n const base_config = this.adapters[this.adapter_name]?.defaults || {};\n return {\n ...base_config,\n ...this.adapter_settings,\n ...this.opts.adapter_config\n };\n }\n /**\n * Get available models.\n * @returns {Object} Map of model objects\n */\n get models() {\n return this.adapter.models;\n }\n /**\n * Get the default model key to use\n * @returns {string} Default model identifier\n */\n get default_model_key() {\n throw new Error(\"default_model_key must be overridden in sub-class\");\n }\n /**\n * Get the current model key\n * @returns {string} Current model key\n */\n get model_key() {\n return this.opts.model_key || this.adapter_config.model_key || this.settings.model_key || this.default_model_key;\n }\n /**\n * Get the current model configuration\n * @returns {Object} Combined base and custom model configuration\n */\n get model_config() {\n const model_key = this.model_key;\n const base_model_config = this.models[model_key] || {};\n return {\n ...this.adapter_config,\n ...base_model_config,\n ...this.opts.model_config\n };\n }\n get model_settings() {\n if (!this.settings[this.model_key]) this.settings[this.model_key] = {};\n return this.settings[this.model_key];\n }\n /**\n * Load the current adapter and transition to loaded state.\n * @async\n * @returns {Promise}\n */\n async load() {\n this.set_state(\"loading\");\n if (!this.adapter?.loaded) {\n await this.invoke_adapter_method(\"load\");\n }\n this.set_state(\"loaded\");\n }\n /**\n * Unload the current adapter and transition to unloaded state.\n * @async\n * @returns {Promise}\n */\n async unload() {\n if (this.adapter?.loaded) {\n this.set_state(\"unloading\");\n await this.invoke_adapter_method(\"unload\");\n this.set_state(\"unloaded\");\n }\n }\n /**\n * Set the model's state.\n * @param {('unloaded'|'loading'|'loaded'|'unloading')} new_state - The new state\n * @throws {Error} If the state is invalid\n */\n set_state(new_state) {\n const valid_states = [\"unloaded\", \"loading\", \"loaded\", \"unloading\"];\n if (!valid_states.includes(new_state)) {\n throw new Error(`Invalid state: ${new_state}`);\n }\n this.state = new_state;\n }\n get is_loading() {\n return this.state === \"loading\";\n }\n get is_loaded() {\n return this.state === \"loaded\";\n }\n get is_unloading() {\n return this.state === \"unloading\";\n }\n get is_unloaded() {\n return this.state === \"unloaded\";\n }\n // ADAPTERS\n /**\n * Get the map of available adapters\n * @returns {Object} Map of adapter names to adapter classes\n */\n get adapters() {\n return this.opts.adapters || {};\n }\n /**\n * Load a specific adapter by name.\n * @async\n * @param {string} adapter_name - Name of the adapter to load\n * @throws {Error} If adapter not found or loading fails\n * @returns {Promise}\n */\n async load_adapter(adapter_name) {\n this.set_adapter(adapter_name);\n if (!this._adapter.loaded) {\n this.set_state(\"loading\");\n try {\n await this.invoke_adapter_method(\"load\");\n this.set_state(\"loaded\");\n } catch (err) {\n this.set_state(\"unloaded\");\n throw new Error(`Failed to load adapter: ${err.message}`);\n }\n }\n }\n /**\n * Set an adapter instance by name without loading it.\n * @param {string} adapter_name - Name of the adapter to set\n * @throws {Error} If adapter not found\n */\n set_adapter(adapter_name) {\n const AdapterClass = this.adapters[adapter_name];\n if (!AdapterClass) {\n throw new Error(`Adapter \"${adapter_name}\" not found.`);\n }\n if (this._adapter?.constructor.name.toLowerCase() === adapter_name.toLowerCase()) {\n return;\n }\n this._adapter = new AdapterClass(this);\n }\n /**\n * Get the current active adapter instance\n * @returns {Object} The active adapter instance\n * @throws {Error} If adapter not found\n */\n get adapter() {\n const adapter_name = this.adapter_name;\n if (!adapter_name) {\n throw new Error(`Adapter not set for model.`);\n }\n if (!this._adapter) {\n this.load_adapter(adapter_name);\n }\n return this._adapter;\n }\n /**\n * Ensure the adapter is ready to execute a method.\n * @param {string} method - Name of the method to check\n * @throws {Error} If adapter not loaded or method not implemented\n */\n ensure_adapter_ready(method) {\n if (!this.adapter) {\n throw new Error(\"No adapter loaded.\");\n }\n if (typeof this.adapter[method] !== \"function\") {\n throw new Error(`Adapter does not implement method: ${method}`);\n }\n }\n /**\n * Invoke a method on the current adapter.\n * @async\n * @param {string} method - Name of the method to call\n * @param {...any} args - Arguments to pass to the method\n * @returns {Promise} Result from the adapter method\n * @throws {Error} If adapter not ready or method fails\n */\n async invoke_adapter_method(method, ...args) {\n this.ensure_adapter_ready(method);\n return await this.adapter[method](...args);\n }\n /**\n * Get platforms as dropdown options.\n * @returns {Array} Array of {value, name} option objects\n */\n get_platforms_as_options() {\n console.log(\"get_platforms_as_options\", this.adapters);\n return Object.entries(this.adapters).map(([key, AdapterClass]) => ({ value: key, name: AdapterClass.defaults.description || key }));\n }\n // SETTINGS\n /**\n * Get the settings configuration schema\n * @returns {Object} Settings configuration object\n */\n get settings_config() {\n return this.process_settings_config({\n adapter: {\n name: \"Model Platform\",\n type: \"dropdown\",\n description: \"Select a model platform to use with Smart Model.\",\n options_callback: \"get_platforms_as_options\",\n is_scope: true,\n // trigger re-render of settings when changed\n callback: \"adapter_changed\",\n default: \"default\"\n }\n });\n }\n /**\n * Process settings configuration with conditionals and prefixes.\n * @param {Object} _settings_config - Raw settings configuration\n * @param {string} [prefix] - Optional prefix for setting keys\n * @returns {Object} Processed settings configuration\n */\n process_settings_config(_settings_config, prefix = null) {\n return Object.entries(_settings_config).reduce((acc, [key, val]) => {\n if (val.conditional) {\n if (!val.conditional(this)) return acc;\n delete val.conditional;\n }\n const new_key = (prefix ? prefix + \".\" : \"\") + this.process_setting_key(key);\n acc[new_key] = val;\n return acc;\n }, {});\n }\n /**\n * Process an individual setting key.\n * @param {string} key - Setting key to process\n * @returns {string} Processed setting key\n */\n process_setting_key(key) {\n return key;\n }\n // override in sub-class if needed for prefixes and variable replacements\n re_render_settings() {\n if (typeof this.opts.re_render_settings === \"function\") this.opts.re_render_settings();\n else console.warn(\"re_render_settings is not a function (must be passed in model opts)\");\n }\n /**\n * Reload model.\n */\n reload_model() {\n console.log(\"reload_model\", this.opts);\n if (typeof this.opts.reload_model === \"function\") this.opts.reload_model();\n else console.warn(\"reload_model is not a function (must be passed in model opts)\");\n }\n adapter_changed() {\n this.reload_model();\n this.re_render_settings();\n }\n model_changed() {\n this.reload_model();\n this.re_render_settings();\n }\n // /**\n // * Render settings.\n // * @param {HTMLElement} [container] - Container element\n // * @param {Object} [opts] - Render options\n // * @returns {Promise} Container element\n // */\n // async render_settings(container=this.settings_container, opts = {}) {\n // if(!this.settings_container || container !== this.settings_container) this.settings_container = container;\n // const model_type = this.constructor.name.toLowerCase().replace('smart', '').replace('model', '');\n // let model_settings_container;\n // if(this.settings_container) {\n // const container_id = `#${model_type}-model-settings-container`;\n // model_settings_container = this.settings_container.querySelector(container_id);\n // if(!model_settings_container) {\n // model_settings_container = document.createElement('div');\n // model_settings_container.id = container_id;\n // this.settings_container.appendChild(model_settings_container);\n // }\n // model_settings_container.innerHTML = '
Loading ' + this.adapter_name + ' settings...
';\n // }\n // const frag = await this.render_settings_component(this, opts);\n // if(model_settings_container) {\n // model_settings_container.innerHTML = '';\n // model_settings_container.appendChild(frag);\n // this.smart_view.on_open_overlay(model_settings_container);\n // }\n // return frag;\n // }\n};\n__publicField(SmartModel, \"defaults\", {\n // override in sub-class if needed\n});\n\n// smart_embed_model.js\nvar SmartEmbedModel = class extends SmartModel {\n /**\n * Create a SmartEmbedModel instance\n * @param {Object} opts - Configuration options\n * @param {Object} [opts.adapters] - Map of available adapter implementations\n * @param {boolean} [opts.use_gpu] - Whether to enable GPU acceleration\n * @param {number} [opts.gpu_batch_size] - Batch size when using GPU\n * @param {number} [opts.batch_size] - Default batch size for processing\n * @param {Object} [opts.model_config] - Model-specific configuration\n * @param {string} [opts.model_config.adapter] - Override adapter type\n * @param {number} [opts.model_config.dims] - Embedding dimensions\n * @param {number} [opts.model_config.max_tokens] - Maximum tokens to process\n * @param {Object} [opts.settings] - User settings\n * @param {string} [opts.settings.api_key] - API key for remote models\n * @param {number} [opts.settings.min_chars] - Minimum text length to embed\n */\n constructor(opts = {}) {\n super(opts);\n __publicField(this, \"scope_name\", \"smart_embed_model\");\n }\n /**\n * Count tokens in an input string\n * @param {string} input - Text to tokenize\n * @returns {Promise} Token count result\n * @property {number} tokens - Number of tokens in input\n * \n * @example\n * ```javascript\n * const result = await model.count_tokens(\"Hello world\");\n * console.log(result.tokens); // 2\n * ```\n */\n async count_tokens(input) {\n return await this.invoke_adapter_method(\"count_tokens\", input);\n }\n /**\n * Generate embeddings for a single input\n * @param {string|Object} input - Text or object with embed_input property\n * @returns {Promise} Embedding result\n * @property {number[]} vec - Embedding vector\n * @property {number} tokens - Token count\n * \n * @example\n * ```javascript\n * const result = await model.embed(\"Hello world\");\n * console.log(result.vec); // [0.1, 0.2, ...]\n * ```\n */\n async embed(input) {\n if (typeof input === \"string\") input = { embed_input: input };\n return (await this.embed_batch([input]))[0];\n }\n /**\n * Generate embeddings for multiple inputs in batch\n * @param {Array} inputs - Array of texts or objects with embed_input\n * @returns {Promise>} Array of embedding results\n * @property {number[]} vec - Embedding vector for each input\n * @property {number} tokens - Token count for each input\n * \n * @example\n * ```javascript\n * const results = await model.embed_batch([\n * { embed_input: \"First text\" },\n * { embed_input: \"Second text\" }\n * ]);\n * ```\n */\n async embed_batch(inputs) {\n return await this.invoke_adapter_method(\"embed_batch\", inputs);\n }\n /**\n * Get the current batch size based on GPU settings\n * @returns {number} Current batch size for processing\n */\n get batch_size() {\n return this.adapter.batch_size || 1;\n }\n /**\n * Get settings configuration schema\n * @returns {Object} Settings configuration object\n */\n get settings_config() {\n const _settings_config = {\n adapter: {\n name: \"Embedding Model Platform\",\n type: \"dropdown\",\n description: \"Select an embedding model platform.\",\n options_callback: \"get_platforms_as_options\",\n callback: \"adapter_changed\",\n default: this.constructor.defaults.adapter\n },\n ...this.adapter.settings_config || {}\n };\n return this.process_settings_config(_settings_config);\n }\n process_setting_key(key) {\n return key.replace(/\\[ADAPTER\\]/g, this.adapter_name);\n }\n /**\n * Get available embedding model options\n * @returns {Array} Array of model options with value and name\n */\n get_embedding_model_options() {\n return Object.entries(this.models).map(([key, model2]) => ({ value: key, name: key }));\n }\n /**\n * Get embedding model options including 'None' option\n * @returns {Array} Array of model options with value and name\n */\n get_block_embedding_model_options() {\n const options = this.get_embedding_model_options();\n options.unshift({ value: \"None\", name: \"None\" });\n return options;\n }\n};\n__publicField(SmartEmbedModel, \"defaults\", {\n adapter: \"transformers\"\n});\n\n// ../smart-model/adapters/_adapter.js\nvar SmartModelAdapter = class {\n /**\n * Create a SmartModelAdapter instance.\n * @param {SmartModel} model - The parent SmartModel instance\n */\n constructor(model2) {\n this.model = model2;\n this.state = \"unloaded\";\n }\n /**\n * Load the adapter.\n * @async\n * @returns {Promise}\n */\n async load() {\n this.set_state(\"loaded\");\n }\n /**\n * Unload the adapter.\n * @returns {void}\n */\n unload() {\n this.set_state(\"unloaded\");\n }\n /**\n * Get all settings.\n * @returns {Object} All settings\n */\n get settings() {\n return this.model.settings;\n }\n /**\n * Get the current model key.\n * @returns {string} Current model identifier\n */\n get model_key() {\n return this.model.model_key;\n }\n /**\n * Get the current model configuration.\n * @returns {Object} Model configuration\n */\n get model_config() {\n return this.model.model_config;\n }\n /**\n * Get model-specific settings.\n * @returns {Object} Settings for current model\n */\n get model_settings() {\n return this.model.model_settings;\n }\n /**\n * Get adapter-specific configuration.\n * @returns {Object} Adapter configuration\n */\n get adapter_config() {\n return this.model.adapter_config;\n }\n /**\n * Get adapter-specific settings.\n * @returns {Object} Adapter settings\n */\n get adapter_settings() {\n return this.model.adapter_settings;\n }\n /**\n * Get the models.\n * @returns {Object} Map of model objects\n */\n get models() {\n if (typeof this.adapter_config.models === \"object\" && Object.keys(this.adapter_config.models || {}).length > 0) return this.adapter_config.models;\n else {\n return {};\n }\n }\n /**\n * Get available models from the API.\n * @abstract\n * @param {boolean} [refresh=false] - Whether to refresh cached models\n * @returns {Promise} Map of model objects\n */\n async get_models(refresh = false) {\n throw new Error(\"get_models not implemented\");\n }\n /**\n * Validate the parameters for get_models.\n * @returns {boolean|Array} True if parameters are valid, otherwise an array of error objects\n */\n validate_get_models_params() {\n return true;\n }\n /**\n * Get available models as dropdown options synchronously.\n * @returns {Array} Array of model options.\n */\n get_models_as_options_sync() {\n const models = this.models;\n const params_valid = this.validate_get_models_params();\n if (params_valid !== true) return params_valid;\n if (!Object.keys(models || {}).length) {\n this.get_models(true);\n return [{ value: \"\", name: \"No models currently available\" }];\n }\n return Object.values(models).map((model2) => ({ value: model2.id, name: model2.name || model2.id })).sort((a, b) => a.name.localeCompare(b.name));\n }\n /**\n * Set the adapter's state.\n * @param {('unloaded'|'loading'|'loaded'|'unloading')} new_state - The new state\n * @throws {Error} If the state is invalid\n */\n set_state(new_state) {\n const valid_states = [\"unloaded\", \"loading\", \"loaded\", \"unloading\"];\n if (!valid_states.includes(new_state)) {\n throw new Error(`Invalid state: ${new_state}`);\n }\n this.state = new_state;\n }\n // Replace individual state getters/setters with a unified state management\n get is_loading() {\n return this.state === \"loading\";\n }\n get is_loaded() {\n return this.state === \"loaded\";\n }\n get is_unloading() {\n return this.state === \"unloading\";\n }\n get is_unloaded() {\n return this.state === \"unloaded\";\n }\n};\n\n// adapters/_adapter.js\nvar SmartEmbedAdapter = class extends SmartModelAdapter {\n /**\n * Create adapter instance\n * @param {SmartEmbedModel} model - Parent model instance\n */\n constructor(model2) {\n super(model2);\n this.smart_embed = model2;\n }\n /**\n * Count tokens in input text\n * @abstract\n * @param {string} input - Text to tokenize\n * @returns {Promise} Token count result\n * @property {number} tokens - Number of tokens in input\n * @throws {Error} If not implemented by subclass\n */\n async count_tokens(input) {\n throw new Error(\"count_tokens method not implemented\");\n }\n /**\n * Generate embeddings for single input\n * @abstract\n * @param {string|Object} input - Text to embed\n * @returns {Promise} Embedding result\n * @property {number[]} vec - Embedding vector\n * @property {number} tokens - Number of tokens in input\n * @throws {Error} If not implemented by subclass\n */\n async embed(input) {\n throw new Error(\"embed method not implemented\");\n }\n /**\n * Generate embeddings for multiple inputs\n * @abstract\n * @param {Array} inputs - Texts to embed\n * @returns {Promise>} Array of embedding results\n * @property {number[]} vec - Embedding vector for each input\n * @property {number} tokens - Number of tokens in each input\n * @throws {Error} If not implemented by subclass\n */\n async embed_batch(inputs) {\n throw new Error(\"embed_batch method not implemented\");\n }\n get settings_config() {\n return {\n \"[ADAPTER].model_key\": {\n name: \"Embedding Model\",\n type: \"dropdown\",\n description: \"Select an embedding model.\",\n options_callback: \"adapter.get_models_as_options_sync\",\n callback: \"model_changed\",\n default: this.constructor.defaults.default_model\n }\n };\n }\n get dims() {\n return this.model_config.dims;\n }\n get max_tokens() {\n return this.model_config.max_tokens;\n }\n // get batch_size() { return this.model_config.batch_size; }\n get use_gpu() {\n if (typeof this._use_gpu === \"undefined\") {\n if (typeof this.model.opts.use_gpu !== \"undefined\") this._use_gpu = this.model.opts.use_gpu;\n else this._use_gpu = typeof navigator !== \"undefined\" && !!navigator?.gpu && this.model_settings.gpu_batch_size !== 0;\n }\n return this._use_gpu;\n }\n set use_gpu(value) {\n this._use_gpu = value;\n }\n get batch_size() {\n if (this.use_gpu && this.model_settings?.gpu_batch_size) return this.model_settings.gpu_batch_size;\n return this.model.opts.batch_size || this.model_config.batch_size || 1;\n }\n};\n/**\n * @override in sub-class with adapter-specific default configurations\n * @property {string} id - The adapter identifier\n * @property {string} description - Human-readable description\n * @property {string} type - Adapter type (\"API\")\n * @property {string} endpoint - API endpoint\n * @property {string} adapter - Adapter identifier\n * @property {string} default_model - Default model to use\n */\n__publicField(SmartEmbedAdapter, \"defaults\", {});\n\n// adapters/transformers.js\nvar transformers_defaults = {\n adapter: \"transformers\",\n description: \"Transformers\",\n default_model: \"TaylorAI/bge-micro-v2\"\n};\nvar SmartEmbedTransformersAdapter = class extends SmartEmbedAdapter {\n /**\n * Create transformers adapter instance\n * @param {SmartEmbedModel} model - Parent model instance\n */\n constructor(model2) {\n super(model2);\n this.pipeline = null;\n this.tokenizer = null;\n }\n /**\n * Load model and tokenizer\n * @returns {Promise}\n */\n async load() {\n await this.load_transformers();\n this.loaded = true;\n }\n /**\n * Unload model and free resources\n * @returns {Promise}\n */\n async unload() {\n if (this.pipeline) {\n if (this.pipeline.destroy) await this.pipeline.destroy();\n this.pipeline = null;\n }\n if (this.tokenizer) {\n this.tokenizer = null;\n }\n this.loaded = false;\n }\n /**\n * Initialize transformers pipeline and tokenizer\n * @private\n * @returns {Promise}\n */\n async load_transformers() {\n const { pipeline, env, AutoTokenizer } = await import(\"@xenova/transformers\");\n env.allowLocalModels = false;\n const pipeline_opts = {\n quantized: true\n };\n if (this.use_gpu) {\n console.log(\"[Transformers] Using GPU\");\n pipeline_opts.device = \"webgpu\";\n pipeline_opts.dtype = \"fp32\";\n } else {\n console.log(\"[Transformers] Using CPU\");\n env.backends.onnx.wasm.numThreads = 8;\n }\n this.pipeline = await pipeline(\"feature-extraction\", this.model_key, pipeline_opts);\n this.tokenizer = await AutoTokenizer.from_pretrained(this.model_key);\n }\n /**\n * Count tokens in input text\n * @param {string} input - Text to tokenize\n * @returns {Promise} Token count result\n */\n async count_tokens(input) {\n if (!this.tokenizer) await this.load();\n const { input_ids } = await this.tokenizer(input);\n return { tokens: input_ids.data.length };\n }\n /**\n * Generate embeddings for multiple inputs\n * @param {Array} inputs - Array of input objects\n * @returns {Promise>} Processed inputs with embeddings\n */\n async embed_batch(inputs) {\n if (!this.pipeline) await this.load();\n const filtered_inputs = inputs.filter((item) => item.embed_input?.length > 0);\n if (!filtered_inputs.length) return [];\n if (filtered_inputs.length > this.batch_size) {\n console.log(`Processing ${filtered_inputs.length} inputs in batches of ${this.batch_size}`);\n const results = [];\n for (let i = 0; i < filtered_inputs.length; i += this.batch_size) {\n const batch = filtered_inputs.slice(i, i + this.batch_size);\n const batch_results = await this._process_batch(batch);\n results.push(...batch_results);\n }\n return results;\n }\n return await this._process_batch(filtered_inputs);\n }\n /**\n * Process a single batch of inputs\n * @private\n * @param {Array} batch_inputs - Batch of inputs to process\n * @returns {Promise>} Processed batch results\n */\n async _process_batch(batch_inputs) {\n const tokens = await Promise.all(batch_inputs.map((item) => this.count_tokens(item.embed_input)));\n const embed_inputs = await Promise.all(batch_inputs.map(async (item, i) => {\n if (tokens[i].tokens < this.max_tokens) return item.embed_input;\n let token_ct = tokens[i].tokens;\n let truncated_input = item.embed_input;\n while (token_ct > this.max_tokens) {\n const pct = this.max_tokens / token_ct;\n const max_chars = Math.floor(truncated_input.length * pct * 0.9);\n truncated_input = truncated_input.substring(0, max_chars) + \"...\";\n token_ct = (await this.count_tokens(truncated_input)).tokens;\n }\n tokens[i].tokens = token_ct;\n return truncated_input;\n }));\n try {\n const resp = await this.pipeline(embed_inputs, { pooling: \"mean\", normalize: true });\n return batch_inputs.map((item, i) => {\n item.vec = Array.from(resp[i].data).map((val) => Math.round(val * 1e8) / 1e8);\n item.tokens = tokens[i].tokens;\n return item;\n });\n } catch (err) {\n console.error(\"error_processing_batch\", err);\n return Promise.all(batch_inputs.map(async (item) => {\n try {\n const result = await this.pipeline(item.embed_input, { pooling: \"mean\", normalize: true });\n item.vec = Array.from(result[0].data).map((val) => Math.round(val * 1e8) / 1e8);\n item.tokens = (await this.count_tokens(item.embed_input)).tokens;\n return item;\n } catch (single_err) {\n console.error(\"error_processing_single_item\", single_err);\n return {\n ...item,\n vec: [],\n tokens: 0,\n error: single_err.message\n };\n }\n }));\n }\n }\n /** @returns {Object} Settings configuration for transformers adapter */\n get settings_config() {\n return transformers_settings_config;\n }\n /**\n * Get available models (hardcoded list)\n * @returns {Promise} Map of model objects\n */\n get_models() {\n return Promise.resolve(this.models);\n }\n get models() {\n return transformers_models;\n }\n};\n__publicField(SmartEmbedTransformersAdapter, \"defaults\", transformers_defaults);\nvar transformers_models = {\n \"TaylorAI/bge-micro-v2\": {\n \"id\": \"TaylorAI/bge-micro-v2\",\n \"batch_size\": 1,\n \"dims\": 384,\n \"max_tokens\": 512,\n \"name\": \"BGE-micro-v2\",\n \"description\": \"Local, 512 tokens, 384 dim (recommended)\",\n \"adapter\": \"transformers\"\n },\n \"TaylorAI/gte-tiny\": {\n \"id\": \"TaylorAI/gte-tiny\",\n \"batch_size\": 1,\n \"dims\": 384,\n \"max_tokens\": 512,\n \"name\": \"GTE-tiny\",\n \"description\": \"Local, 512 tokens, 384 dim\",\n \"adapter\": \"transformers\"\n },\n \"Mihaiii/Ivysaur\": {\n \"id\": \"Mihaiii/Ivysaur\",\n \"batch_size\": 1,\n \"dims\": 384,\n \"max_tokens\": 512,\n \"name\": \"Ivysaur\",\n \"description\": \"Local, 512 tokens, 384 dim\",\n \"adapter\": \"transformers\"\n },\n \"andersonbcdefg/bge-small-4096\": {\n \"id\": \"andersonbcdefg/bge-small-4096\",\n \"batch_size\": 1,\n \"dims\": 384,\n \"max_tokens\": 4096,\n \"name\": \"BGE-small-4K\",\n \"description\": \"Local, 4,096 tokens, 384 dim\",\n \"adapter\": \"transformers\"\n },\n \"Xenova/jina-embeddings-v2-base-zh\": {\n \"id\": \"Xenova/jina-embeddings-v2-base-zh\",\n \"batch_size\": 1,\n \"dims\": 512,\n \"max_tokens\": 8192,\n \"name\": \"Jina-v2-base-zh-8K\",\n \"description\": \"Local, 8,192 tokens, 512 dim, Chinese/English bilingual\",\n \"adapter\": \"transformers\"\n },\n \"Xenova/jina-embeddings-v2-small-en\": {\n \"id\": \"Xenova/jina-embeddings-v2-small-en\",\n \"batch_size\": 1,\n \"dims\": 512,\n \"max_tokens\": 8192,\n \"name\": \"Jina-v2-small-en\",\n \"description\": \"Local, 8,192 tokens, 512 dim\",\n \"adapter\": \"transformers\"\n },\n \"nomic-ai/nomic-embed-text-v1.5\": {\n \"id\": \"nomic-ai/nomic-embed-text-v1.5\",\n \"batch_size\": 1,\n \"dims\": 256,\n \"max_tokens\": 8192,\n \"name\": \"Nomic-embed-text-v1.5\",\n \"description\": \"Local, 8,192 tokens, 256 dim\",\n \"adapter\": \"transformers\"\n },\n \"Xenova/bge-small-en-v1.5\": {\n \"id\": \"Xenova/bge-small-en-v1.5\",\n \"batch_size\": 1,\n \"dims\": 384,\n \"max_tokens\": 512,\n \"name\": \"BGE-small\",\n \"description\": \"Local, 512 tokens, 384 dim\",\n \"adapter\": \"transformers\"\n },\n \"nomic-ai/nomic-embed-text-v1\": {\n \"id\": \"nomic-ai/nomic-embed-text-v1\",\n \"batch_size\": 1,\n \"dims\": 768,\n \"max_tokens\": 2048,\n \"name\": \"Nomic-embed-text\",\n \"description\": \"Local, 2,048 tokens, 768 dim\",\n \"adapter\": \"transformers\"\n }\n};\nvar transformers_settings_config = {\n \"[ADAPTER].gpu_batch_size\": {\n name: \"GPU Batch Size\",\n type: \"number\",\n description: \"Number of embeddings to process per batch on GPU. Use 0 to disable GPU.\",\n placeholder: \"Enter number ex. 10\"\n },\n \"[ADAPTER].legacy_transformers\": {\n name: \"Legacy Transformers (no GPU)\",\n type: \"toggle\",\n description: \"Use legacy transformers (v2) instead of v3.\",\n callback: \"embed_model_changed\",\n default: true\n }\n};\n\n// build/transformers_iframe_script.js\nvar model = null;\nasync function process_message(data) {\n const { method, params, id, iframe_id } = data;\n try {\n let result;\n switch (method) {\n case \"init\":\n console.log(\"init\");\n break;\n case \"load\":\n console.log(\"load\", params);\n model = new SmartEmbedModel({\n ...params,\n adapters: { transformers: SmartEmbedTransformersAdapter },\n adapter: \"transformers\",\n settings: {}\n });\n await model.load();\n result = { model_loaded: true };\n break;\n case \"embed_batch\":\n if (!model) throw new Error(\"Model not loaded\");\n result = await model.embed_batch(params.inputs);\n break;\n case \"count_tokens\":\n if (!model) throw new Error(\"Model not loaded\");\n result = await model.count_tokens(params);\n break;\n default:\n throw new Error(`Unknown method: ${method}`);\n }\n return { id, result, iframe_id };\n } catch (error) {\n console.error(\"Error processing message:\", error);\n return { id, error: error.message, iframe_id };\n }\n}\nprocess_message({ method: \"init\" });\n"; \ No newline at end of file diff --git a/smart-embed-model/connectors/transformers_worker.js b/smart-embed-model/connectors/transformers_worker.js index 237646bb..c017f445 100644 --- a/smart-embed-model/connectors/transformers_worker.js +++ b/smart-embed-model/connectors/transformers_worker.js @@ -15,6 +15,7 @@ var SmartModel = class { * @throws {Error} If required options are missing */ constructor(opts = {}) { + __publicField(this, "scope_name", "smart_model"); this.opts = opts; this.validate_opts(opts); this.state = "unloaded"; @@ -52,7 +53,7 @@ var SmartModel = class { * @returns {string} Current adapter name */ get adapter_name() { - const adapter_key = this.models[this.model_key]?.adapter; + const adapter_key = this.opts.model_config?.adapter || this.opts.adapter || this.settings.adapter || Object.keys(this.adapters)[0]; if (!adapter_key || !this.adapters[adapter_key]) throw new Error(`Platform "${adapter_key}" not supported`); return adapter_key; } @@ -72,6 +73,13 @@ var SmartModel = class { ...this.opts.adapter_config }; } + /** + * Get available models. + * @returns {Object} Map of model objects + */ + get models() { + return this.adapter.models; + } /** * Get the default model key to use * @returns {string} Default model identifier @@ -79,18 +87,12 @@ var SmartModel = class { get default_model_key() { throw new Error("default_model_key must be overridden in sub-class"); } - /** - * Get available models configuration - * @returns {Object} Map of model configurations - */ - get models() { - } /** * Get the current model key * @returns {string} Current model key */ get model_key() { - return this.opts.model_key || this.settings.model_key || this.adapter_config.model_key || this.default_model_key; + return this.opts.model_key || this.adapter_config.model_key || this.settings.model_key || this.default_model_key; } /** * Get the current model configuration @@ -240,6 +242,14 @@ var SmartModel = class { this.ensure_adapter_ready(method); return await this.adapter[method](...args); } + /** + * Get platforms as dropdown options. + * @returns {Array} Array of {value, name} option objects + */ + get_platforms_as_options() { + console.log("get_platforms_as_options", this.adapters); + return Object.entries(this.adapters).map(([key, AdapterClass]) => ({ value: key, name: AdapterClass.defaults.description || key })); + } // SETTINGS /** * Get the settings configuration schema @@ -247,7 +257,16 @@ var SmartModel = class { */ get settings_config() { return this.process_settings_config({ - // SETTINGS GO HERE + adapter: { + name: "Model Platform", + type: "dropdown", + description: "Select a model platform to use with Smart Model.", + options_callback: "get_platforms_as_options", + is_scope: true, + // trigger re-render of settings when changed + callback: "adapter_changed", + default: "default" + } }); } /** @@ -276,146 +295,59 @@ var SmartModel = class { return key; } // override in sub-class if needed for prefixes and variable replacements + re_render_settings() { + if (typeof this.opts.re_render_settings === "function") this.opts.re_render_settings(); + else console.warn("re_render_settings is not a function (must be passed in model opts)"); + } + /** + * Reload model. + */ + reload_model() { + console.log("reload_model", this.opts); + if (typeof this.opts.reload_model === "function") this.opts.reload_model(); + else console.warn("reload_model is not a function (must be passed in model opts)"); + } + adapter_changed() { + this.reload_model(); + this.re_render_settings(); + } + model_changed() { + this.reload_model(); + this.re_render_settings(); + } + // /** + // * Render settings. + // * @param {HTMLElement} [container] - Container element + // * @param {Object} [opts] - Render options + // * @returns {Promise} Container element + // */ + // async render_settings(container=this.settings_container, opts = {}) { + // if(!this.settings_container || container !== this.settings_container) this.settings_container = container; + // const model_type = this.constructor.name.toLowerCase().replace('smart', '').replace('model', ''); + // let model_settings_container; + // if(this.settings_container) { + // const container_id = `#${model_type}-model-settings-container`; + // model_settings_container = this.settings_container.querySelector(container_id); + // if(!model_settings_container) { + // model_settings_container = document.createElement('div'); + // model_settings_container.id = container_id; + // this.settings_container.appendChild(model_settings_container); + // } + // model_settings_container.innerHTML = '
Loading ' + this.adapter_name + ' settings...
'; + // } + // const frag = await this.render_settings_component(this, opts); + // if(model_settings_container) { + // model_settings_container.innerHTML = ''; + // model_settings_container.appendChild(frag); + // this.smart_view.on_open_overlay(model_settings_container); + // } + // return frag; + // } }; __publicField(SmartModel, "defaults", { // override in sub-class if needed }); -// models.json -var models_default = { - "TaylorAI/bge-micro-v2": { - id: "TaylorAI/bge-micro-v2", - batch_size: 1, - dims: 384, - max_tokens: 512, - name: "BGE-micro-v2", - description: "Local, 512 tokens, 384 dim (recommended)", - adapter: "transformers" - }, - "TaylorAI/gte-tiny": { - id: "TaylorAI/gte-tiny", - batch_size: 1, - dims: 384, - max_tokens: 512, - name: "GTE-tiny", - description: "Local, 512 tokens, 384 dim", - adapter: "transformers" - }, - "Mihaiii/Ivysaur": { - id: "Mihaiii/Ivysaur", - batch_size: 1, - dims: 384, - max_tokens: 512, - name: "Ivysaur", - description: "Local, 512 tokens, 384 dim", - adapter: "transformers" - }, - "andersonbcdefg/bge-small-4096": { - id: "andersonbcdefg/bge-small-4096", - batch_size: 1, - dims: 384, - max_tokens: 4096, - name: "BGE-small-4K", - description: "Local, 4,096 tokens, 384 dim", - adapter: "transformers" - }, - "Xenova/jina-embeddings-v2-base-zh": { - id: "Xenova/jina-embeddings-v2-base-zh", - batch_size: 1, - dims: 512, - max_tokens: 8192, - name: "Jina-v2-base-zh-8K", - description: "Local, 8,192 tokens, 512 dim, Chinese/English bilingual", - adapter: "transformers" - }, - "text-embedding-3-small": { - id: "text-embedding-3-small", - batch_size: 50, - dims: 1536, - max_tokens: 8191, - name: "OpenAI Text-3 Small", - description: "API, 8,191 tokens, 1,536 dim", - endpoint: "https://api.openai.com/v1/embeddings", - adapter: "openai" - }, - "text-embedding-3-large": { - id: "text-embedding-3-large", - batch_size: 50, - dims: 3072, - max_tokens: 8191, - name: "OpenAI Text-3 Large", - description: "API, 8,191 tokens, 3,072 dim", - endpoint: "https://api.openai.com/v1/embeddings", - adapter: "openai" - }, - "text-embedding-3-small-512": { - id: "text-embedding-3-small", - batch_size: 50, - dims: 512, - max_tokens: 8191, - name: "OpenAI Text-3 Small - 512", - description: "API, 8,191 tokens, 512 dim", - endpoint: "https://api.openai.com/v1/embeddings", - adapter: "openai" - }, - "text-embedding-3-large-256": { - id: "text-embedding-3-large", - batch_size: 50, - dims: 256, - max_tokens: 8191, - name: "OpenAI Text-3 Large - 256", - description: "API, 8,191 tokens, 256 dim", - endpoint: "https://api.openai.com/v1/embeddings", - adapter: "openai" - }, - "text-embedding-ada-002": { - id: "text-embedding-ada-002", - batch_size: 50, - dims: 1536, - max_tokens: 8191, - name: "OpenAI Ada", - description: "API, 8,191 tokens, 1,536 dim", - endpoint: "https://api.openai.com/v1/embeddings", - adapter: "openai" - }, - "Xenova/jina-embeddings-v2-small-en": { - id: "Xenova/jina-embeddings-v2-small-en", - batch_size: 1, - dims: 512, - max_tokens: 8192, - name: "Jina-v2-small-en", - description: "Local, 8,192 tokens, 512 dim", - adapter: "transformers" - }, - "nomic-ai/nomic-embed-text-v1.5": { - id: "nomic-ai/nomic-embed-text-v1.5", - batch_size: 1, - dims: 256, - max_tokens: 8192, - name: "Nomic-embed-text-v1.5", - description: "Local, 8,192 tokens, 256 dim", - adapter: "transformers" - }, - "Xenova/bge-small-en-v1.5": { - id: "Xenova/bge-small-en-v1.5", - batch_size: 1, - dims: 384, - max_tokens: 512, - name: "BGE-small", - description: "Local, 512 tokens, 384 dim", - adapter: "transformers" - }, - "nomic-ai/nomic-embed-text-v1": { - id: "nomic-ai/nomic-embed-text-v1", - batch_size: 1, - dims: 768, - max_tokens: 2048, - name: "Nomic-embed-text", - description: "Local, 2,048 tokens, 768 dim", - adapter: "transformers" - } -}; - // smart_embed_model.js var SmartEmbedModel = class extends SmartModel { /** @@ -435,6 +367,7 @@ var SmartEmbedModel = class extends SmartModel { */ constructor(opts = {}) { super(opts); + __publicField(this, "scope_name", "smart_embed_model"); } /** * Count tokens in an input string @@ -493,40 +426,26 @@ var SmartEmbedModel = class extends SmartModel { get batch_size() { return this.adapter.batch_size || 1; } - /** @returns {Object} Map of available embedding models */ - get models() { - return models_default; - } - /** @returns {string} Default model key if none specified */ - get default_model_key() { - return "TaylorAI/bge-micro-v2"; - } /** * Get settings configuration schema * @returns {Object} Settings configuration object */ get settings_config() { const _settings_config = { - model_key: { - name: "Embedding Model", + adapter: { + name: "Embedding Model Platform", type: "dropdown", - description: "Select an embedding model.", - options_callback: "embed_model.get_embedding_model_options", - callback: "embed_model_changed", - default: "TaylorAI/bge-micro-v2" - }, - "[EMBED_MODEL].min_chars": { - name: "Minimum Embedding Length", - type: "number", - description: "Minimum length of note to embed.", - placeholder: "Enter number ex. 300" + description: "Select an embedding model platform.", + options_callback: "get_platforms_as_options", + callback: "adapter_changed", + default: this.constructor.defaults.adapter }, ...this.adapter.settings_config || {} }; - return this.process_settings_config(_settings_config, "embed_model"); + return this.process_settings_config(_settings_config); } process_setting_key(key) { - return key.replace(/\[EMBED_MODEL\]/g, this.model_key); + return key.replace(/\[ADAPTER\]/g, this.adapter_name); } /** * Get available embedding model options @@ -546,7 +465,7 @@ var SmartEmbedModel = class extends SmartModel { } }; __publicField(SmartEmbedModel, "defaults", { - model_key: "TaylorAI/bge-micro-v2" + adapter: "transformers" }); // ../smart-model/adapters/_adapter.js @@ -616,6 +535,46 @@ var SmartModelAdapter = class { get adapter_settings() { return this.model.adapter_settings; } + /** + * Get the models. + * @returns {Object} Map of model objects + */ + get models() { + if (typeof this.adapter_config.models === "object" && Object.keys(this.adapter_config.models || {}).length > 0) return this.adapter_config.models; + else { + return {}; + } + } + /** + * Get available models from the API. + * @abstract + * @param {boolean} [refresh=false] - Whether to refresh cached models + * @returns {Promise} Map of model objects + */ + async get_models(refresh = false) { + throw new Error("get_models not implemented"); + } + /** + * Validate the parameters for get_models. + * @returns {boolean|Array} True if parameters are valid, otherwise an array of error objects + */ + validate_get_models_params() { + return true; + } + /** + * Get available models as dropdown options synchronously. + * @returns {Array} Array of model options. + */ + get_models_as_options_sync() { + const models = this.models; + const params_valid = this.validate_get_models_params(); + if (params_valid !== true) return params_valid; + if (!Object.keys(models || {}).length) { + this.get_models(true); + return [{ value: "", name: "No models currently available" }]; + } + return Object.values(models).map((model2) => ({ value: model2.id, name: model2.name || model2.id })).sort((a, b) => a.name.localeCompare(b.name)); + } /** * Set the adapter's state. * @param {('unloaded'|'loading'|'loaded'|'unloading')} new_state - The new state @@ -688,6 +647,18 @@ var SmartEmbedAdapter = class extends SmartModelAdapter { async embed_batch(inputs) { throw new Error("embed_batch method not implemented"); } + get settings_config() { + return { + "[ADAPTER].model_key": { + name: "Embedding Model", + type: "dropdown", + description: "Select an embedding model.", + options_callback: "adapter.get_models_as_options_sync", + callback: "model_changed", + default: this.constructor.defaults.default_model + } + }; + } get dims() { return this.model_config.dims; } @@ -710,8 +681,23 @@ var SmartEmbedAdapter = class extends SmartModelAdapter { return this.model.opts.batch_size || this.model_config.batch_size || 1; } }; +/** + * @override in sub-class with adapter-specific default configurations + * @property {string} id - The adapter identifier + * @property {string} description - Human-readable description + * @property {string} type - Adapter type ("API") + * @property {string} endpoint - API endpoint + * @property {string} adapter - Adapter identifier + * @property {string} default_model - Default model to use + */ +__publicField(SmartEmbedAdapter, "defaults", {}); // adapters/transformers.js +var transformers_defaults = { + adapter: "transformers", + description: "Transformers", + default_model: "TaylorAI/bge-micro-v2" +}; var SmartEmbedTransformersAdapter = class extends SmartEmbedAdapter { /** * Create transformers adapter instance @@ -849,15 +835,109 @@ var SmartEmbedTransformersAdapter = class extends SmartEmbedAdapter { get settings_config() { return transformers_settings_config; } + /** + * Get available models (hardcoded list) + * @returns {Promise} Map of model objects + */ + get_models() { + return Promise.resolve(this.models); + } + get models() { + return transformers_models; + } +}; +__publicField(SmartEmbedTransformersAdapter, "defaults", transformers_defaults); +var transformers_models = { + "TaylorAI/bge-micro-v2": { + "id": "TaylorAI/bge-micro-v2", + "batch_size": 1, + "dims": 384, + "max_tokens": 512, + "name": "BGE-micro-v2", + "description": "Local, 512 tokens, 384 dim (recommended)", + "adapter": "transformers" + }, + "TaylorAI/gte-tiny": { + "id": "TaylorAI/gte-tiny", + "batch_size": 1, + "dims": 384, + "max_tokens": 512, + "name": "GTE-tiny", + "description": "Local, 512 tokens, 384 dim", + "adapter": "transformers" + }, + "Mihaiii/Ivysaur": { + "id": "Mihaiii/Ivysaur", + "batch_size": 1, + "dims": 384, + "max_tokens": 512, + "name": "Ivysaur", + "description": "Local, 512 tokens, 384 dim", + "adapter": "transformers" + }, + "andersonbcdefg/bge-small-4096": { + "id": "andersonbcdefg/bge-small-4096", + "batch_size": 1, + "dims": 384, + "max_tokens": 4096, + "name": "BGE-small-4K", + "description": "Local, 4,096 tokens, 384 dim", + "adapter": "transformers" + }, + "Xenova/jina-embeddings-v2-base-zh": { + "id": "Xenova/jina-embeddings-v2-base-zh", + "batch_size": 1, + "dims": 512, + "max_tokens": 8192, + "name": "Jina-v2-base-zh-8K", + "description": "Local, 8,192 tokens, 512 dim, Chinese/English bilingual", + "adapter": "transformers" + }, + "Xenova/jina-embeddings-v2-small-en": { + "id": "Xenova/jina-embeddings-v2-small-en", + "batch_size": 1, + "dims": 512, + "max_tokens": 8192, + "name": "Jina-v2-small-en", + "description": "Local, 8,192 tokens, 512 dim", + "adapter": "transformers" + }, + "nomic-ai/nomic-embed-text-v1.5": { + "id": "nomic-ai/nomic-embed-text-v1.5", + "batch_size": 1, + "dims": 256, + "max_tokens": 8192, + "name": "Nomic-embed-text-v1.5", + "description": "Local, 8,192 tokens, 256 dim", + "adapter": "transformers" + }, + "Xenova/bge-small-en-v1.5": { + "id": "Xenova/bge-small-en-v1.5", + "batch_size": 1, + "dims": 384, + "max_tokens": 512, + "name": "BGE-small", + "description": "Local, 512 tokens, 384 dim", + "adapter": "transformers" + }, + "nomic-ai/nomic-embed-text-v1": { + "id": "nomic-ai/nomic-embed-text-v1", + "batch_size": 1, + "dims": 768, + "max_tokens": 2048, + "name": "Nomic-embed-text", + "description": "Local, 2,048 tokens, 768 dim", + "adapter": "transformers" + } }; var transformers_settings_config = { - "[EMBED_MODEL].gpu_batch_size": { + "[ADAPTER].gpu_batch_size": { name: "GPU Batch Size", type: "number", description: "Number of embeddings to process per batch on GPU. Use 0 to disable GPU.", placeholder: "Enter number ex. 10" }, - "legacy_transformers": { + "[ADAPTER].legacy_transformers": { name: "Legacy Transformers (no GPU)", type: "toggle", description: "Use legacy transformers (v2) instead of v3.", diff --git a/smart-environment/smart_env.js b/smart-environment/smart_env.js index 112167ea..577e3414 100644 --- a/smart-environment/smart_env.js +++ b/smart-environment/smart_env.js @@ -205,7 +205,6 @@ export class SmartEnv { * @returns {Promise} Container element */ async render_component(component_key, scope, opts = {}) { - console.log('render_component', component_key, scope, opts); const template = this.get_component(component_key, scope); const frag = await template(scope, opts); return frag;