Skip to content

Commit

Permalink
Harrison/modelscope (#5156)
Browse files Browse the repository at this point in the history
Co-authored-by: thomas-yanxin <[email protected]>
Co-authored-by: Dev 2049 <[email protected]>
  • Loading branch information
3 people authored May 24, 2023
1 parent 2d5588c commit 11c26eb
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 0 deletions.
20 changes: 20 additions & 0 deletions docs/ecosystem/modelscope.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# ModelScope

This page covers how to use the modelscope ecosystem within LangChain.
It is broken into two parts: installation and setup, and then references to specific modelscope wrappers.

## Installation and Setup

* Install the Python SDK with `pip install modelscope`

## Wrappers

### Embeddings

There exists a modelscope Embeddings wrapper, which you can access with

```python
from langchain.embeddings import ModelScopeEmbeddings
```

For a more detailed walkthrough of this, see [this notebook](../modules/models/text_embedding/examples/modelscope_hub.ipynb)
82 changes: 82 additions & 0 deletions docs/modules/models/text_embedding/examples/modelscope_hub.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# ModelScope\n",
"\n",
"Let's load the ModelScope Embedding class."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings import ModelScopeEmbeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_id = \"damo/nlp_corom_sentence-embedding_english-base\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings = ModelScopeEmbeddings(model_id=model_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = \"This is a test document.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query_result = embeddings.embed_query(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"doc_results = embeddings.embed_documents([\"foo\"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "chatgpt",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.9.15"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions langchain/embeddings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings
from langchain.embeddings.jina import JinaEmbeddings
from langchain.embeddings.llamacpp import LlamaCppEmbeddings
from langchain.embeddings.modelscope_hub import ModelScopeEmbeddings
from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings
Expand All @@ -38,6 +39,7 @@
"JinaEmbeddings",
"LlamaCppEmbeddings",
"HuggingFaceHubEmbeddings",
"ModelScopeEmbeddings",
"TensorflowHubEmbeddings",
"SagemakerEndpointEmbeddings",
"HuggingFaceInstructEmbeddings",
Expand Down
72 changes: 72 additions & 0 deletions langchain/embeddings/modelscope_hub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Wrapper around ModelScopeHub embedding models."""
from typing import Any, List

from pydantic import BaseModel, Extra

from langchain.embeddings.base import Embeddings


class ModelScopeEmbeddings(BaseModel, Embeddings):
"""Wrapper around modelscope_hub embedding models.
To use, you should have the ``modelscope`` python package installed.
Example:
.. code-block:: python
from langchain.embeddings import ModelScopeEmbeddings
model_id = "damo/nlp_corom_sentence-embedding_english-base"
embed = ModelScopeEmbeddings(model_id=model_id)
"""

embed: Any
model_id: str = "damo/nlp_corom_sentence-embedding_english-base"
"""Model name to use."""

def __init__(self, **kwargs: Any):
"""Initialize the modelscope"""
super().__init__(**kwargs)
try:
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id)

except ImportError as e:
raise ImportError(
"Could not import some python packages."
"Please install it with `pip install modelscope`."
) from e

class Config:
"""Configuration for this pydantic object."""

extra = Extra.forbid

def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a modelscope embedding model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
texts = list(map(lambda x: x.replace("\n", " "), texts))
inputs = {"source_sentence": texts}
embeddings = self.embed(input=inputs)["text_embedding"]
return embeddings.tolist()

def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a modelscope embedding model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
text = text.replace("\n", " ")
inputs = {"source_sentence": [text]}
embedding = self.embed(input=inputs)["text_embedding"][0]
return embedding.tolist()
19 changes: 19 additions & 0 deletions tests/integration_tests/embeddings/test_modelscope_hub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Test modelscope embeddings."""
from langchain.embeddings.modelscope_hub import ModelScopeEmbeddings


def test_modelscope_embedding_documents() -> None:
"""Test modelscope embeddings for documents."""
documents = ["foo bar"]
embedding = ModelScopeEmbeddings()
output = embedding.embed_documents(documents)
assert len(output) == 2
assert len(output[0]) == 512


def test_modelscope_embedding_query() -> None:
"""Test modelscope embeddings for query."""
document = "foo bar"
embedding = ModelScopeEmbeddings()
output = embedding.embed_query(document)
assert len(output) == 512

0 comments on commit 11c26eb

Please sign in to comment.