Skip to content

Commit

Permalink
Support Python 3.12 via newer scipy and nmslib-metabrainz (#523)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Daniel King <[email protected]>
  • Loading branch information
jason-nance and dakinggg authored Sep 15, 2024
1 parent 0719a93 commit e93baa4
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ jobs:
docker run --rm scispacy black scispacy --check --line-length 88
docker run --rm scispacy bash scripts/mypy.sh
docker run --rm scispacy pytest tests/ --cov scispacy --cov-fail-under=20
17 changes: 17 additions & 0 deletions .github/workflows/old_scipy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: CI (old scipy)

on:
pull_request:
branches:
- main

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v1
- name: Build and test with Docker using an older version of scipy
run: |
docker build --tag scispacy .
docker run --rm scispacy bash -c "pip install 'scipy<1.11' && pytest tests/"
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ WORKDIR /work
COPY requirements.in .

RUN pip install -r requirements.in
RUN pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.3/en_core_sci_sm-0.5.3.tar.gz
RUN pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_core_sci_sm-0.5.4.tar.gz
RUN python -m spacy download en_core_web_sm
RUN python -m spacy download en_core_web_md

Expand Down
10 changes: 8 additions & 2 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
numpy
scipy<1.11
# NOTE: scipy<1.11 is required when creating the linkers, so that's currently
# only supported on Python<3.11
# https://github.com/allenai/scispacy/issues/519#issuecomment-2229915999
scipy
spacy>=3.7.0,<3.8.0
spacy-lookups-data
pandas
Expand All @@ -8,7 +11,10 @@ conllu

# Candidate generation and entity linking
joblib
nmslib>=1.7.3.6
nmslib>=1.7.3.6; python_version < '3.11'
# Use the metabrainz fork until nmslib supports installing on Python 3.11+
# https://github.com/nmslib/nmslib/issues/555
nmslib-metabrainz==2.1.3; python_version >= '3.11'
scikit-learn>=0.20.3

# Required for testing.
Expand Down
6 changes: 6 additions & 0 deletions scispacy/candidate_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import nmslib
from nmslib.dist import FloatIndex

from scispacy.util import scipy_supports_sparse_float16
from scispacy.file_cache import cached_path
from scispacy.linking_utils import (
KnowledgeBase,
Expand Down Expand Up @@ -375,6 +376,11 @@ def create_tfidf_ann_index(
The kb items to generate the index and vectors for.
"""
if not scipy_supports_sparse_float16():
raise RuntimeError(
"This function requires scipy<1.11, which only runs on Python<3.11."
)

tfidf_vectorizer_path = f"{out_path}/tfidf_vectorizer.joblib"
ann_index_path = f"{out_path}/nmslib_index.bin"
tfidf_vectors_path = f"{out_path}/tfidf_vectors_sparse.npz"
Expand Down
7 changes: 7 additions & 0 deletions scispacy/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from packaging.version import Version
import spacy
import scipy
from spacy.language import Language
from spacy.tokens import Doc

Expand All @@ -17,6 +19,11 @@ def create_combined_rule_model() -> Language:
return nlp


def scipy_supports_sparse_float16() -> bool:
# https://github.com/scipy/scipy/issues/7408
return Version(scipy.__version__) < Version("1.11")


class WhitespaceTokenizer:
"""
Spacy doesn't assume that text is tokenised. Sometimes this
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@
license="Apache",
install_requires=[
"spacy>=3.7.0,<3.8.0",
"scipy<1.11",
"scipy",
"requests>=2.0.0,<3.0.0",
"conllu",
"numpy",
"joblib",
"nmslib>=1.7.3.6",
"nmslib>=1.7.3.6; python_version < '3.11'",
"nmslib-metabrainz==2.1.3; python_version >= '3.11'",
"scikit-learn>=0.20.3",
"pysbd",
],
Expand Down
7 changes: 7 additions & 0 deletions tests/test_candidate_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@

from scispacy.candidate_generation import CandidateGenerator, create_tfidf_ann_index, MentionCandidate
from scispacy.umls_utils import UmlsKnowledgeBase
from scispacy.util import scipy_supports_sparse_float16


class TestCandidateGeneration(unittest.TestCase):

def setUp(self):
super().setUp()
if not scipy_supports_sparse_float16():
# https://github.com/allenai/scispacy/issues/519#issuecomment-2229915999
self.skipTest("Candidate generation isn't supported for scipy>=1.11")

def test_create_index(self):

umls_fixture = UmlsKnowledgeBase("tests/fixtures/umls_test_fixture.json")
Expand Down
5 changes: 5 additions & 0 deletions tests/test_linking.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@
from scispacy.linking import EntityLinker
from scispacy.umls_utils import UmlsKnowledgeBase
from scispacy.abbreviation import AbbreviationDetector
from scispacy.util import scipy_supports_sparse_float16


class TestLinker(unittest.TestCase):
def setUp(self):
super().setUp()
if not scipy_supports_sparse_float16():
# https://github.com/allenai/scispacy/issues/519#issuecomment-2229915999
self.skipTest("Candidate generation isn't supported for scipy>=1.11")

self.nlp = spacy.load("en_core_web_sm")

umls_fixture = UmlsKnowledgeBase("tests/fixtures/umls_test_fixture.json", "tests/fixtures/test_umls_tree.tsv")
Expand Down

0 comments on commit e93baa4

Please sign in to comment.