diff --git a/README.md b/README.md index 7f031241..09dce6c6 100644 --- a/README.md +++ b/README.md @@ -453,16 +453,6 @@ print(output_text) Space Robots are a great way to get your kids interested in science. After all, they are the future! ``` -## Embeddings API - -Embeddings are vector representations of sequences. You can use these vectors for measuring the overall similarity between texts. Embeddings are useful for tasks such as search and retrieval. - -```python -resp = together.Embeddings.create("embed this sentence into a single vector", model="togethercomputer/bert-base-uncased") - -print(resp['data'][0]['embedding']) # [0.06659205, 0.07896972, 0.007910785 ........] -``` - ## Colab Tutorial Follow along in our Colab (Google Colaboratory) Notebook Tutorial [Example Finetuning Project](https://colab.research.google.com/drive/11DwtftycpDSgp3Z1vnV-Cy68zvkGZL4K?usp=sharing). diff --git a/pyproject.toml b/pyproject.toml index eaaec773..e0def03e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "0.2.8" +version = "0.2.9" authors = [ "Together AI " ] diff --git a/src/together/__init__.py b/src/together/__init__.py index fc8b8fbd..f75a3b42 100644 --- a/src/together/__init__.py +++ b/src/together/__init__.py @@ -1,6 +1,7 @@ import os import sys import urllib.parse +from typing import Type from .version import VERSION @@ -41,6 +42,27 @@ from .models import Models +class Together: + complete: Type[Complete] + completion: Type[Completion] + embeddings: Type[Embeddings] + files: Type[Files] + finetune: Type[Finetune] + image: Type[Image] + models: Type[Models] + + def __init__( + self, + ) -> None: + self.complete = Complete + self.completion = Completion + self.embeddings = Embeddings + self.files = Files + self.finetune = Finetune + self.image = Image + self.models = Models + + __all__ = [ "api_key", "api_base", @@ -63,4 +85,5 @@ "MISSING_API_KEY_MESSAGE", "BACKOFF_FACTOR", "min_samples", + "Together", ] diff --git a/src/together/commands/complete.py b/src/together/commands/complete.py index 909476ef..4d9b1309 100644 --- a/src/together/commands/complete.py +++ b/src/together/commands/complete.py @@ -90,6 +90,13 @@ def add_parser(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) action="store_true", help="temperature for the LM", ) + subparser.add_argument( + "--safety-model", + "-sm", + default=None, + type=str, + help="The name of the safety model to use for moderation.", + ) subparser.set_defaults(func=_run_complete) @@ -142,6 +149,7 @@ def _run_complete(args: argparse.Namespace) -> None: top_k=args.top_k, repetition_penalty=args.repetition_penalty, logprobs=args.logprobs, + safety_model=args.safety_model, ) except together.AuthenticationError: logger.critical(together.MISSING_API_KEY_MESSAGE) @@ -159,6 +167,7 @@ def _run_complete(args: argparse.Namespace) -> None: top_p=args.top_p, top_k=args.top_k, repetition_penalty=args.repetition_penalty, + safety_model=args.safety_model, raw=args.raw, ): if not args.raw: diff --git a/src/together/commands/embeddings.py b/src/together/commands/embeddings.py index a3798460..01be9072 100644 --- a/src/together/commands/embeddings.py +++ b/src/together/commands/embeddings.py @@ -1,7 +1,6 @@ from __future__ import annotations import argparse -import json import together from together import Embeddings @@ -42,7 +41,7 @@ def _run_complete(args: argparse.Namespace) -> None: model=args.model, ) - print(json.dumps(response, indent=4)) + print([e.embedding for e in response.data]) except together.AuthenticationError: logger.critical(together.MISSING_API_KEY_MESSAGE) exit(0) diff --git a/src/together/complete.py b/src/together/complete.py index 1d765f01..625da0b4 100644 --- a/src/together/complete.py +++ b/src/together/complete.py @@ -24,6 +24,7 @@ def create( logprobs: Optional[int] = None, api_key: Optional[str] = None, cast: bool = False, + safety_model: Optional[str] = None, ) -> Union[Dict[str, Any], TogetherResponse]: if model == "": model = together.default_text_model @@ -38,6 +39,7 @@ def create( "stop": stop, "repetition_penalty": repetition_penalty, "logprobs": logprobs, + "safety_model": safety_model, } # send request @@ -70,6 +72,7 @@ def create_streaming( raw: Optional[bool] = False, api_key: Optional[str] = None, cast: Optional[bool] = False, + safety_model: Optional[str] = None, ) -> Union[Iterator[str], Iterator[TogetherResponse]]: """ Prints streaming responses and returns the completed text. @@ -88,6 +91,7 @@ def create_streaming( "stop": stop, "repetition_penalty": repetition_penalty, "stream_tokens": True, + "safety_model": safety_model, } # send request diff --git a/src/together/embeddings.py b/src/together/embeddings.py index a306862c..3ba5e3f0 100644 --- a/src/together/embeddings.py +++ b/src/together/embeddings.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Optional +import concurrent.futures +from typing import Any, Dict, List, Optional, Union import together from together.utils import create_post_request, get_logger @@ -7,29 +8,57 @@ logger = get_logger(str(__name__)) +class DataItem: + def __init__(self, embedding: List[float]): + self.embedding = embedding + + +class EmbeddingsOutput: + def __init__(self, data: List[DataItem]): + self.data = data + + class Embeddings: @classmethod def create( - self, - input: str, + cls, + input: Union[str, List[str]], model: Optional[str] = "", - ) -> Dict[str, Any]: + ) -> EmbeddingsOutput: if model == "": model = together.default_embedding_model - parameter_payload = { - "input": input, - "model": model, - } + if isinstance(input, str): + parameter_payload = { + "input": input, + "model": model, + } + + response = cls._process_input(parameter_payload) + + return EmbeddingsOutput([DataItem(response["data"][0]["embedding"])]) + elif isinstance(input, list): + # If input is a list, process each string concurrently + with concurrent.futures.ThreadPoolExecutor() as executor: + parameter_payloads = [{"input": item, "model": model} for item in input] + results = list(executor.map(cls._process_input, parameter_payloads)) + + return EmbeddingsOutput( + [DataItem(item["data"][0]["embedding"]) for item in results] + ) + + @classmethod + def _process_input(cls, parameter_payload: Dict[str, Any]) -> Dict[str, Any]: # send request response = create_post_request( url=together.api_base_embeddings, json=parameter_payload ) + # return the json as a DotDict try: response_json = dict(response.json()) - except Exception as e: raise together.JSONError(e, http_status=response.status_code) + return response_json