From 9dfcb8d3318834c39158f8427f8ad6d18f69b905 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Wed, 29 Jan 2025 18:44:57 +0100 Subject: [PATCH 1/3] Pin nltk version for sentence tokenizer --- haystack/components/preprocessors/sentence_tokenizer.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/haystack/components/preprocessors/sentence_tokenizer.py b/haystack/components/preprocessors/sentence_tokenizer.py index 9619b851fc..60abac9599 100644 --- a/haystack/components/preprocessors/sentence_tokenizer.py +++ b/haystack/components/preprocessors/sentence_tokenizer.py @@ -9,7 +9,7 @@ from haystack import logging from haystack.lazy_imports import LazyImport -with LazyImport("Run 'pip install nltk'") as nltk_imports: +with LazyImport("Run 'pip install nltk==3.9.1'") as nltk_imports: import nltk logger = logging.getLogger(__name__) diff --git a/pyproject.toml b/pyproject.toml index 72b8687c34..3169d2f893 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ extra-dependencies = [ "openpyxl", # XLSXToDocument "tabulate", # XLSXToDocument - "nltk", # NLTKDocumentSplitter + "nltk==3.9.1", # NLTKDocumentSplitter # OpenAPI "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions From 4fcc120bef4cfb498653cb3321fd7a353655bc09 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 31 Jan 2025 16:15:06 +0100 Subject: [PATCH 2/3] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fcd1f9624f..9a5f15070b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ extra-dependencies = [ "openpyxl", # XLSXToDocument "tabulate", # XLSXToDocument - "nltk==3.9.1", # NLTKDocumentSplitter + "nltk>=3.9.1", # NLTKDocumentSplitter # OpenAPI "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions From 1a380585f7b981d24ff22e11ce7c2354da941029 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 31 Jan 2025 16:15:12 +0100 Subject: [PATCH 3/3] Update haystack/components/preprocessors/sentence_tokenizer.py --- haystack/components/preprocessors/sentence_tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/components/preprocessors/sentence_tokenizer.py b/haystack/components/preprocessors/sentence_tokenizer.py index 60abac9599..2cb77347d3 100644 --- a/haystack/components/preprocessors/sentence_tokenizer.py +++ b/haystack/components/preprocessors/sentence_tokenizer.py @@ -9,7 +9,7 @@ from haystack import logging from haystack.lazy_imports import LazyImport -with LazyImport("Run 'pip install nltk==3.9.1'") as nltk_imports: +with LazyImport("Run 'pip install nltk>=3.9.1'") as nltk_imports: import nltk logger = logging.getLogger(__name__)