Skip to content

Commit

Permalink
Chore: Allow passing kwargs to request data field (#716)
Browse files Browse the repository at this point in the history
* bump again :(

* update to kwarg

* add test case

* rename to request_kwargs

* remove install detectron2

* pip compile

* add changelog for remove detectron2 install

* resolve weaviate import issue on python 3.9
  • Loading branch information
yuming-long authored Jun 12, 2023
1 parent fc53277 commit b354e8e
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 118 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
## 0.7.4-dev0
## 0.7.4

### Enhancements

* Allows passing kwargs to request data field for `partition_via_api` and `partition_multiple_via_api`
* Enable MIME type detection if libmagic is not available
* Adds handling for empty files in `detect_filetype` and `partition`.

### Features

### Fixes

* Reslove `grpcio` import issue on `weaviate.schema.validate_schema` for python 3.9 and 3.10
* Remove building `detectron2` from source in Dockerfile

## 0.7.3

### Enhancements
Expand Down
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
pip install --no-cache -r requirements/ingest-slack.txt && \
pip install --no-cache -r requirements/ingest-wikipedia.txt && \
pip install --no-cache -r requirements/local-inference.txt && \
pip install --no-cache "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2" && \
dnf -y groupremove "Development Tools" && \
dnf clean all

Expand Down
8 changes: 6 additions & 2 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ anyio==3.7.0
# via
# -c requirements/base.txt
# jupyter-server
appnope==0.1.3
# via
# ipykernel
# ipython
argon2-cffi==21.3.0
# via
# jupyter-server
Expand Down Expand Up @@ -59,7 +63,7 @@ executing==1.2.0
# via stack-data
fastjsonschema==2.17.1
# via nbformat
filelock==3.12.0
filelock==3.12.1
# via virtualenv
fqdn==1.5.1
# via jsonschema
Expand Down Expand Up @@ -215,7 +219,7 @@ pip-tools==6.13.0
# via -r requirements/dev.in
pkgutil-resolve-name==1.3.10
# via jsonschema
platformdirs==3.5.1
platformdirs==3.5.3
# via
# -c requirements/test.txt
# jupyter-core
Expand Down
50 changes: 2 additions & 48 deletions requirements/huggingface.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,11 @@ click==8.1.3
# via
# -c requirements/base.txt
# sacremoses
cmake==3.26.4
# via triton
filelock==3.12.0
filelock==3.12.1
# via
# huggingface-hub
# torch
# transformers
# triton
fsspec==2023.6.0
# via huggingface-hub
huggingface-hub==0.15.1
Expand All @@ -41,8 +38,6 @@ joblib==1.2.0
# sacremoses
langdetect==1.0.9
# via -r requirements/huggingface.in
lit==16.0.5.post0
# via triton
markupsafe==2.1.3
# via jinja2
mpmath==1.3.0
Expand All @@ -53,31 +48,6 @@ numpy==1.23.5
# via
# -c requirements/base.txt
# transformers
nvidia-cublas-cu11==11.10.3.66
# via
# nvidia-cudnn-cu11
# nvidia-cusolver-cu11
# torch
nvidia-cuda-cupti-cu11==11.7.101
# via torch
nvidia-cuda-nvrtc-cu11==11.7.99
# via torch
nvidia-cuda-runtime-cu11==11.7.99
# via torch
nvidia-cudnn-cu11==8.5.0.96
# via torch
nvidia-cufft-cu11==10.9.0.58
# via torch
nvidia-curand-cu11==10.2.10.91
# via torch
nvidia-cusolver-cu11==11.4.0.1
# via torch
nvidia-cusparse-cu11==11.7.4.91
# via torch
nvidia-nccl-cu11==2.14.3
# via torch
nvidia-nvtx-cu11==11.7.91
# via torch
packaging==23.1
# via
# -c requirements/base.txt
Expand Down Expand Up @@ -113,9 +83,7 @@ sympy==1.12
tokenizers==0.13.3
# via transformers
torch==2.0.1
# via
# -r requirements/huggingface.in
# triton
# via -r requirements/huggingface.in
tqdm==4.65.0
# via
# -c requirements/base.txt
Expand All @@ -124,8 +92,6 @@ tqdm==4.65.0
# transformers
transformers==4.30.1
# via -r requirements/huggingface.in
triton==2.0.0
# via torch
typing-extensions==4.6.3
# via
# -c requirements/base.txt
Expand All @@ -136,15 +102,3 @@ urllib3==1.26.16
# -c requirements/base.txt
# -c requirements/constraints.in
# requests
wheel==0.40.0
# via
# -c requirements/constraints.in
# nvidia-cublas-cu11
# nvidia-cuda-cupti-cu11
# nvidia-cuda-runtime-cu11
# nvidia-curand-cu11
# nvidia-cusparse-cu11
# nvidia-nvtx-cu11

# The following packages are considered to be unsafe in a requirements file:
# setuptools
49 changes: 2 additions & 47 deletions requirements/local-inference.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ charset-normalizer==3.1.0
# -c requirements/base.txt
# pdfminer-six
# requests
cmake==3.26.4
# via triton
coloredlogs==15.0.1
# via onnxruntime
contourpy==1.0.7
Expand All @@ -34,15 +32,14 @@ cycler==0.11.0
# via matplotlib
effdet==0.4.1
# via layoutparser
filelock==3.12.0
filelock==3.12.1
# via
# huggingface-hub
# torch
# transformers
# triton
flatbuffers==23.5.26
# via onnxruntime
fonttools==4.39.4
fonttools==4.40.0
# via matplotlib
fsspec==2023.6.0
# via huggingface-hub
Expand All @@ -67,8 +64,6 @@ kiwisolver==1.4.4
# via matplotlib
layoutparser[layoutmodels,tesseract]==0.3.4
# via unstructured-inference
lit==16.0.5.post0
# via triton
markupsafe==2.1.3
# via jinja2
matplotlib==3.7.1
Expand All @@ -90,31 +85,6 @@ numpy==1.23.5
# scipy
# torchvision
# transformers
nvidia-cublas-cu11==11.10.3.66
# via
# nvidia-cudnn-cu11
# nvidia-cusolver-cu11
# torch
nvidia-cuda-cupti-cu11==11.7.101
# via torch
nvidia-cuda-nvrtc-cu11==11.7.99
# via torch
nvidia-cuda-runtime-cu11==11.7.99
# via torch
nvidia-cudnn-cu11==8.5.0.96
# via torch
nvidia-cufft-cu11==10.9.0.58
# via torch
nvidia-curand-cu11==10.2.10.91
# via torch
nvidia-cusolver-cu11==11.4.0.1
# via torch
nvidia-cusparse-cu11==11.7.4.91
# via torch
nvidia-nccl-cu11==2.14.3
# via torch
nvidia-nvtx-cu11==11.7.91
# via torch
omegaconf==2.3.0
# via effdet
onnxruntime==1.15.0
Expand Down Expand Up @@ -220,7 +190,6 @@ torch==2.0.1
# layoutparser
# timm
# torchvision
# triton
torchvision==0.15.2
# via
# effdet
Expand All @@ -234,8 +203,6 @@ tqdm==4.65.0
# transformers
transformers==4.30.1
# via unstructured-inference
triton==2.0.0
# via torch
typing-extensions==4.6.3
# via
# -c requirements/base.txt
Expand All @@ -251,19 +218,7 @@ urllib3==1.26.16
# requests
wand==0.6.11
# via pdfplumber
wheel==0.40.0
# via
# -c requirements/constraints.in
# nvidia-cublas-cu11
# nvidia-cuda-cupti-cu11
# nvidia-cuda-runtime-cu11
# nvidia-curand-cu11
# nvidia-cusparse-cu11
# nvidia-nvtx-cu11
zipp==3.15.0
# via
# -c requirements/base.txt
# importlib-resources

# The following packages are considered to be unsafe in a requirements file:
# setuptools
1 change: 1 addition & 0 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ types-Markdown
types-requests
types-tabulate
vcrpy
grpcio
6 changes: 4 additions & 2 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ flake8==6.0.0
# via -r requirements/test.in
freezegun==1.2.2
# via -r requirements/test.in
grpcio==1.54.2
# via -r requirements/test.in
idna==3.4
# via
# -c requirements/base.txt
Expand Down Expand Up @@ -67,7 +69,7 @@ packaging==23.1
# pytest
pathspec==0.11.1
# via black
platformdirs==3.5.1
platformdirs==3.5.3
# via black
pluggy==1.0.0
# via pytest
Expand All @@ -79,7 +81,7 @@ pydantic==1.10.9
# label-studio-sdk
pyflakes==3.0.1
# via flake8
pytest==7.3.1
pytest==7.3.2
# via
# pytest-cov
# pytest-mock
Expand Down
37 changes: 37 additions & 0 deletions test_unstructured/partition/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ def test_partition_via_api_raises_with_bad_response(monkeypatch):
partition_via_api(filename=filename, api_key="FAKEROO")


def test_partition_via_api_valid_request_data_kwargs():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")

elements = partition_via_api(filename=filename, api_key="FAKEROO", strategy="fast")
assert isinstance(elements, list)


def test_partition_via_api_invalid_request_data_kwargs():
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
with pytest.raises(ValueError):
partition_via_api(filename=filename, api_key="FAKEROO", strategy="not_a_strategy")


class MockMultipleResponse:
def __init__(self, status_code):
self.status_code = status_code
Expand Down Expand Up @@ -276,3 +289,27 @@ def test_partition_multiple_via_api_from_files_raises_without_filenames(monkeypa
files=files,
api_key="FAKEROO",
)


def test_partition_multiple_via_api_valid_request_data_kwargs():
filenames = [
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
]

elements = partition_multiple_via_api(filenames=filenames, api_key="FAKEROO", strategy="fast")
assert isinstance(elements, list)


def test_partition_multiple_via_api_invalid_request_data_kwargs():
filenames = [
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
]

with pytest.raises(ValueError):
partition_multiple_via_api(
filenames=filenames,
api_key="FAKEROO",
strategy="not_a_strategy",
)
2 changes: 1 addition & 1 deletion unstructured/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.4-dev0" # pragma: no cover
__version__ = "0.7.4" # pragma: no cover
Loading

0 comments on commit b354e8e

Please sign in to comment.