Fix the issue #415

legendy4141 · Dec 12, 2024 · 25118b4 · 25118b4
1 parent aa888a2
commit 25118b4
Show file tree

Hide file tree

Showing 81 changed files with 346 additions and 346 deletions.
diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml
@@ -100,9 +100,9 @@ jobs:
     - name: Test examples
       working-directory: ./
       env:
-        OPENAI_BASE_URL: http://0.0.0.0:8000
+        _BASE_URL: http://0.0.0.0:8000
       run: |
-        python ci/mock_openai.py &
+        python ci/mock_.py &
         cd nodejs/examples
         npm test
   macos:

diff --git a/ci/mock_openai.py b/ci/mock_openai.py
@@ -1,17 +1,17 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
-"""A zero-dependency mock OpenAI embeddings API endpoint for testing purposes."""
+"""A zero-dependency mock  embeddings API endpoint for testing purposes."""
 import argparse
 import json
 import http.server
 
 
-class MockOpenAIRequestHandler(http.server.BaseHTTPRequestHandler):
+class MockRequestHandler(http.server.BaseHTTPRequestHandler):
     def do_POST(self):
         content_length = int(self.headers["Content-Length"])
         post_data = self.rfile.read(content_length)
         post_data = json.loads(post_data.decode("utf-8"))
-        # See: https://platform.openai.com/docs/api-reference/embeddings/create
+        # See: https://platform..com/docs/api-reference/embeddings/create
 
         if isinstance(post_data["input"], str):
             num_inputs = 1
@@ -45,13 +45,13 @@ def do_POST(self):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Mock OpenAI embeddings API endpoint")
+    parser = argparse.ArgumentParser(description="Mock  embeddings API endpoint")
     parser.add_argument("--port", type=int, default=8000, help="Port to listen on")
     args = parser.parse_args()
     port = args.port
 
     print(f"server started on port {port}. Press Ctrl-C to stop.")
-    print(f"To use, set OPENAI_BASE_URL=http://localhost:{port} in your environment.")
+    print(f"To use, set _BASE_URL=http://localhost:{port} in your environment.")
 
-    with http.server.HTTPServer(("0.0.0.0", port), MockOpenAIRequestHandler) as server:
+    with http.server.HTTPServer(("0.0.0.0", port), MockRequestHandler) as server:
         server.serve_forever()
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -139,7 +139,7 @@ nav:
               - Cross Encoder Reranker: reranking/cross_encoder.md
               - ColBERT Reranker: reranking/colbert.md
               - Jina Reranker: reranking/jina.md
-              - OpenAI Reranker: reranking/openai.md
+              -  Reranker: reranking/.md
               - AnswerDotAi Rerankers: reranking/answerdotai.md
               - Voyage AI Rerankers: reranking/voyageai.md
               - Building Custom Rerankers: reranking/custom_reranker.md
@@ -162,7 +162,7 @@ nav:
                   - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
                   - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
                   - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
-                  - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
+                  -  Embeddings: embeddings/available_embedding_models/text_embedding_functions/_embedding.md
                   - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
                   - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
                   - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
@@ -271,7 +271,7 @@ nav:
           - Cross Encoder Reranker: reranking/cross_encoder.md
           - ColBERT Reranker: reranking/colbert.md
           - Jina Reranker: reranking/jina.md
-          - OpenAI Reranker: reranking/openai.md
+          -  Reranker: reranking/.md
           - AnswerDotAi Rerankers: reranking/answerdotai.md
           - Building Custom Rerankers: reranking/custom_reranker.md
           - Example: notebooks/lancedb_reranking.ipynb
@@ -293,7 +293,7 @@ nav:
               - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
               - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
               - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
-              - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
+              -  Embeddings: embeddings/available_embedding_models/text_embedding_functions/_embedding.md
               - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
               - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
               - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md

diff --git a/docs/package-lock.json b/docs/package-lock.json
diff --git a/docs/src/basic.md b/docs/src/basic.md
@@ -547,13 +547,13 @@ Use the `drop_table()` method on the database to remove a table.
 
 
 ## Using the Embedding API
-You can use the embedding API when working with embedding models. It automatically vectorizes the data at ingestion and query time and comes with built-in integrations with popular embedding models like Openai, Hugging Face, Sentence Transformers, CLIP and more.
+You can use the embedding API when working with embedding models. It automatically vectorizes the data at ingestion and query time and comes with built-in integrations with popular embedding models like , Hugging Face, Sentence Transformers, CLIP and more.
 
 === "Python"
 
     ```python
     --8<-- "python/python/tests/docs/test_embeddings_optional.py:imports"
-    --8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
+    --8<-- "python/python/tests/docs/test_embeddings_optional.py:_embeddings"
     ```
 
 === "Typescript[^1]"
@@ -562,14 +562,14 @@ You can use the embedding API when working with embedding models. It automatical
 
         ```typescript
         --8<-- "nodejs/examples/embedding.test.ts:imports"
-        --8<-- "nodejs/examples/embedding.test.ts:openai_embeddings"
+        --8<-- "nodejs/examples/embedding.test.ts:_embeddings"
         ```
 
 === "Rust"
 
     ```rust
-    --8<-- "rust/lancedb/examples/openai.rs:imports"
-    --8<-- "rust/lancedb/examples/openai.rs:openai_embeddings"
+    --8<-- "rust/lancedb/examples/.rs:imports"
+    --8<-- "rust/lancedb/examples/.rs:_embeddings"
     ```
 
 Learn about using the existing integrations and creating custom embedding functions in the [embedding API guide](./embeddings/index.md).

diff --git a/...eddings/available_embedding_models/text_embedding_functions/openai_embedding.md b/...eddings/available_embedding_models/text_embedding_functions/openai_embedding.md
@@ -1,11 +1,11 @@
-# OpenAI embeddings
+#  embeddings
 
-LanceDB registers the OpenAI embeddings function in the registry by default, as `openai`. Below are the parameters that you can customize when creating the instances:
+LanceDB registers the  embeddings function in the registry by default, as ``. Below are the parameters that you can customize when creating the instances:
 
 | Parameter | Type | Default Value | Description |
 |---|---|---|---|
 | `name` | `str` | `"text-embedding-ada-002"` | The name of the model. |
-| `dim` | `int` |  Model default   | For OpenAI's newer text-embedding-3 model, we can specify a dimensionality that is smaller than the 1536 size. This feature supports it |
+| `dim` | `int` |  Model default   | For 's newer text-embedding-3 model, we can specify a dimensionality that is smaller than the 1536 size. This feature supports it |
 | `use_azure` | bool | `False` | Set true to use Azure OpenAPI SDK |
 
 
@@ -15,7 +15,7 @@ from lancedb.pydantic import LanceModel, Vector
 from lancedb.embeddings import get_registry
 
 db = lancedb.connect("/tmp/db")
-func = get_registry().get("openai").create(name="text-embedding-ada-002")
+func = get_registry().get("").create(name="text-embedding-ada-002")
 
 class Words(LanceModel):
     text: str = func.SourceField()

diff --git a/docs/src/embeddings/default_embedding_functions.md b/docs/src/embeddings/default_embedding_functions.md
@@ -7,7 +7,7 @@ Before jumping on the list of available models, let's understand how to get an e
 !!! example "Example usage"
     ```python
     model = get_registry()
-              .get("openai")
+              .get("")
               .create(name="text-embedding-ada-002")
     ```
 
@@ -46,7 +46,7 @@ These functions are registered by default to handle text embeddings.
 | [**Sentence Transformers**](available_embedding_models/text_embedding_functions/sentence_transformers.md "sentence-transformers")  | 🧠 **SentenceTransformers** is a Python framework for state-of-the-art sentence, text, and image embeddings. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/sbert_2.png" alt="Sentence Transformers Icon" width="90" height="35">](available_embedding_models/text_embedding_functions/sentence_transformers.md)|                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**Huggingface Models**](available_embedding_models/text_embedding_functions/huggingface_embedding.md "huggingface") |🤗 We offer support for all **Huggingface** models. The default model is `colbert-ir/colbertv2.0`. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/hugging_face.png" alt="Huggingface Icon" width="130" height="35">](available_embedding_models/text_embedding_functions/huggingface_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**Ollama Embeddings**](available_embedding_models/text_embedding_functions/ollama_embedding.md "ollama") | 🔍 Generate embeddings via the **Ollama** python library. Ollama supports embedding models, making it possible to build RAG apps. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/Ollama.png" alt="Ollama Icon" width="110" height="35">](available_embedding_models/text_embedding_functions/ollama_embedding.md)|                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
-| [**OpenAI Embeddings**](available_embedding_models/text_embedding_functions/openai_embedding.md "openai")| 🔑 **OpenAI’s** text embeddings measure the relatedness of text strings. **LanceDB** supports state-of-the-art embeddings from OpenAI. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/openai.png" alt="OpenAI Icon" width="100" height="35">](available_embedding_models/text_embedding_functions/openai_embedding.md)|                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
+| [** Embeddings**](available_embedding_models/text_embedding_functions/_embedding.md "")| 🔑 **’s** text embeddings measure the relatedness of text strings. **LanceDB** supports state-of-the-art embeddings from . | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/.png" alt=" Icon" width="100" height="35">](available_embedding_models/text_embedding_functions/_embedding.md)|                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**Instructor Embeddings**](available_embedding_models/text_embedding_functions/instructor_embedding.md "instructor") | 📚 **Instructor**: An instruction-finetuned text embedding model that can generate text embeddings tailored to any task and domains by simply providing the task instruction, without any finetuning. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/instructor_embedding.png" alt="Instructor Embedding Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/instructor_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**Gemini Embeddings**](available_embedding_models/text_embedding_functions/gemini_embedding.md "gemini-text") | 🌌 Google’s Gemini API generates state-of-the-art embeddings for words, phrases, and sentences. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/gemini.png" alt="Gemini Icon" width="95" height="35">](available_embedding_models/text_embedding_functions/gemini_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**Cohere Embeddings**](available_embedding_models/text_embedding_functions/cohere_embedding.md "cohere") | 💬 This will help you get started with **Cohere** embedding models using LanceDB. Using cohere API requires cohere package. Install it via `pip`. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/cohere.png" alt="Cohere Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/cohere_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
@@ -60,7 +60,7 @@ These functions are registered by default to handle text embeddings.
 [st-key]: "sentence-transformers"
 [hf-key]: "huggingface"
 [ollama-key]: "ollama"
-[openai-key]: "openai"
+[-key]: ""
 [instructor-key]: "instructor"
 [gemini-key]: "gemini-text"
 [cohere-key]: "cohere"

diff --git a/docs/src/embeddings/embedding_functions.md b/docs/src/embeddings/embedding_functions.md
@@ -30,31 +30,31 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
     abstract base interface. It subclasses Pydantic Model which can be utilized to write complex schemas simply as we'll see next!
 
 === "TypeScript"
-    In the TypeScript SDK, the choices are more limited. For now, only the OpenAI
+    In the TypeScript SDK, the choices are more limited. For now, only the 
     embedding function is available.
 
     ```javascript
     import * as lancedb from '@lancedb/lancedb'
     import { getRegistry } from '@lancedb/lancedb/embeddings'
 
-    // You need to provide an OpenAI API key
+    // You need to provide an  API key
     const apiKey = ""
     // The embedding function will create embeddings for the 'text' column
-    const func = getRegistry().get("openai").create({apiKey})
+    const func = getRegistry().get("").create({apiKey})
     ```
 === "Rust"
-    In the Rust SDK, the choices are more limited. For now, only the OpenAI
-    embedding function is available. But unlike the Python and TypeScript SDKs, you need manually register the OpenAI embedding function.
+    In the Rust SDK, the choices are more limited. For now, only the 
+    embedding function is available. But unlike the Python and TypeScript SDKs, you need manually register the  embedding function.
 
     ```toml
-    // Make sure to include the `openai` feature
+    // Make sure to include the `` feature
     [dependencies]
-    lancedb = {version = "*", features = ["openai"]}
+    lancedb = {version = "*", features = [""]}
     ```
 
     ```rust
-    --8<-- "rust/lancedb/examples/openai.rs:imports"
-    --8<-- "rust/lancedb/examples/openai.rs:openai_embeddings"
+    --8<-- "rust/lancedb/examples/.rs:imports"
+    --8<-- "rust/lancedb/examples/.rs:_embeddings"
     ```
 
 ## 2. Define the data model or schema