Skip to content

Commit

Permalink
IMPROVEMENT: Minor redis improvements (langchain-ai#13381)
Browse files Browse the repository at this point in the history
- **Description:**
- Fixes a `key_prefix` bug where passing it in on
`Redis.from_existing(...)` did not work properly. Updates doc strings
accordingly.
- Updates Redis filter classes logic with best practices on typing,
string formatting, and handling "empty" filters.
- Fixes a bug that would prevent multiple tag filters from being applied
together in some scenarios.
- Added a whole new filter unit testing module. Also updated code
formatting for a number of modules that were failing the `make`
commands.
  - **Issue:** N/A
  - **Dependencies:** N/A
  - **Tag maintainer:** @baskaryan 
  - **Twitter handle:** @tchutch94
  • Loading branch information
tylerhutcherson authored and amiaxys committed Nov 23, 2023
1 parent ca9955f commit 4577b46
Show file tree
Hide file tree
Showing 4 changed files with 342 additions and 97 deletions.
8 changes: 7 additions & 1 deletion libs/langchain/langchain/utilities/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class TokenEscaper:

# Characters that RediSearch requires us to escape during queries.
# Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization
DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/]"
DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"

def __init__(self, escape_chars_re: Optional[Pattern] = None):
if escape_chars_re:
Expand All @@ -37,6 +37,12 @@ def __init__(self, escape_chars_re: Optional[Pattern] = None):
self.escaped_chars_re = re.compile(self.DEFAULT_ESCAPED_CHARS)

def escape(self, value: str) -> str:
if not isinstance(value, str):
raise TypeError(
"Value must be a string object for token escaping."
f"Got type {type(value)}"
)

def escape_symbol(match: re.Match) -> str:
value = match.group(0)
return f"\\{value}"
Expand Down
60 changes: 37 additions & 23 deletions libs/langchain/langchain/vectorstores/redis/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def check_index_exists(client: RedisType, index_name: str) -> bool:
try:
client.ft(index_name).info()
except: # noqa: E722
logger.info("Index does not exist")
logger.debug("Index does not exist")
return False
logger.info("Index already exists")
logger.debug("Index already exists")
return True


Expand Down Expand Up @@ -155,9 +155,12 @@ class Redis(VectorStore):
.. code-block:: python
rds = Redis.from_existing_index(
# must pass in schema and key_prefix from another index
existing_rds = Redis.from_existing_index(
embeddings, # an Embeddings object
index_name="my-index",
schema=rds.schema, # schema dumped from another index
key_prefix=rds.key_prefix, # key prefix from another index
redis_url="redis://localhost:6379",
)
Expand Down Expand Up @@ -249,7 +252,7 @@ def __init__(
key_prefix: Optional[str] = None,
**kwargs: Any,
):
"""Initialize with necessary components."""
"""Initialize Redis vector store with necessary components."""
self._check_deprecated_kwargs(kwargs)
try:
# TODO use importlib to check if redis is installed
Expand Down Expand Up @@ -401,6 +404,7 @@ def from_texts_return_keys(
index_schema = generated_schema

# Create instance
# init the class -- if Redis is unavailable, will throw exception
instance = cls(
redis_url,
index_name,
Expand Down Expand Up @@ -495,6 +499,7 @@ def from_existing_index(
embedding: Embeddings,
index_name: str,
schema: Union[Dict[str, str], str, os.PathLike],
key_prefix: Optional[str] = None,
**kwargs: Any,
) -> Redis:
"""Connect to an existing Redis index.
Expand All @@ -504,20 +509,26 @@ def from_existing_index(
from langchain.vectorstores import Redis
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
redisearch = Redis.from_existing_index(
# must pass in schema and key_prefix from another index
existing_rds = Redis.from_existing_index(
embeddings,
index_name="my-index",
redis_url="redis://username:password@localhost:6379"
schema=rds.schema, # schema dumped from another index
key_prefix=rds.key_prefix, # key prefix from another index
redis_url="redis://username:password@localhost:6379",
)
Args:
embedding (Embeddings): Embedding model class (i.e. OpenAIEmbeddings)
for embedding queries.
index_name (str): Name of the index to connect to.
schema (Union[Dict[str, str], str, os.PathLike]): Schema of the index
and the vector schema. Can be a dict, or path to yaml file
and the vector schema. Can be a dict, or path to yaml file.
key_prefix (Optional[str]): Prefix to use for all keys in Redis associated
with this index.
**kwargs (Any): Additional keyword arguments to pass to the Redis client.
Returns:
Expand All @@ -528,29 +539,32 @@ def from_existing_index(
ImportError: If the redis python package is not installed.
"""
redis_url = get_from_dict_or_env(kwargs, "redis_url", "REDIS_URL")
try:
# We need to first remove redis_url from kwargs,
# otherwise passing it to Redis will result in an error.
if "redis_url" in kwargs:
kwargs.pop("redis_url")
client = get_client(redis_url=redis_url, **kwargs)
# check if redis has redisearch module installed
check_redis_module_exist(client, REDIS_REQUIRED_MODULES)
# ensure that the index already exists
assert check_index_exists(
client, index_name
), f"Index {index_name} does not exist"
except Exception as e:
raise ValueError(f"Redis failed to connect: {e}")
# We need to first remove redis_url from kwargs,
# otherwise passing it to Redis will result in an error.
if "redis_url" in kwargs:
kwargs.pop("redis_url")

return cls(
# Create instance
# init the class -- if Redis is unavailable, will throw exception
instance = cls(
redis_url,
index_name,
embedding,
index_schema=schema,
key_prefix=key_prefix,
**kwargs,
)

# Check for existence of the declared index
if not check_index_exists(instance.client, index_name):
# Will only raise if the running Redis server does not
# have a record of this particular index
raise ValueError(
f"Redis failed to connect: Index {index_name} does not exist."
)

return instance

@property
def schema(self) -> Dict[str, List[Any]]:
"""Return the schema of the index."""
Expand Down
Loading

0 comments on commit 4577b46

Please sign in to comment.