Skip to content

Commit

Permalink
Merge branch 'main' into fix/invalid-evaluation-doctype-deduction
Browse files Browse the repository at this point in the history
  • Loading branch information
micmarty-deepsense authored Jul 2, 2024
2 parents 2405a2a + 72f28d7 commit e197f69
Show file tree
Hide file tree
Showing 331 changed files with 39,625 additions and 12,193 deletions.
4 changes: 2 additions & 2 deletions .github/actions/base-cache/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ runs:
using: "composite"
steps:
- name: Check for/restore base cache
uses: actions/cache/restore@v3
uses: actions/cache/restore@v4
id: virtualenv-cache-restore
with:
path: |
Expand Down Expand Up @@ -41,7 +41,7 @@ runs:
- name: Save Cache
if: steps.virtualenv-cache-restore.outputs.cache-hit != 'true'
id: virtualenv-cache-save
uses: actions/cache/save@v3
uses: actions/cache/save@v4
with:
path: |
.venv
Expand Down
4 changes: 2 additions & 2 deletions .github/actions/base-ingest-cache/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ runs:
using: "composite"
steps:
- name: Check for/restore ingest cache
uses: actions/cache/restore@v3
uses: actions/cache/restore@v4
id: ingest-virtualenv-cache-restore
with:
path: |
Expand Down Expand Up @@ -40,7 +40,7 @@ runs:
- name: Save Ingest Cache
if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
id: ingest-virtualenv-cache-save
uses: actions/cache/save@v3
uses: actions/cache/save@v4
with:
path: |
.venv
Expand Down
85 changes: 54 additions & 31 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
env:
NLTK_DATA: ${{ github.workspace }}/nltk_data
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: ./.github/actions/base-cache
with:
python-version: ${{ matrix.python-version }}
Expand All @@ -33,9 +33,9 @@ jobs:
python-version: ["3.9","3.10","3.11", "3.12"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Check for dependency conflicts
Expand All @@ -47,9 +47,9 @@ jobs:
python-version: [ "3.9","3.10","3.11","3.12" ]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install all extras
Expand All @@ -64,9 +64,9 @@ jobs:
runs-on: ubuntu-latest
needs: [setup, changelog]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Setup virtual environment
Expand All @@ -81,14 +81,14 @@ jobs:
shellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: ShellCheck
uses: ludeeus/action-shellcheck@master

shfmt:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: setup shfmt
uses: mfinelli/setup-shfmt@v3
- name: Run shfmt
Expand All @@ -104,9 +104,9 @@ jobs:
NLTK_DATA: ${{ github.workspace }}/nltk_data
needs: [setup, lint]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Setup virtual environment
Expand All @@ -116,7 +116,7 @@ jobs:
- name: Test
env:
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
TESSERACT_VERSION : "5.3.4"
TESSERACT_VERSION : "5.4.1"
run: |
source .venv/bin/activate
sudo apt-get update
Expand Down Expand Up @@ -145,9 +145,9 @@ jobs:
UNSTRUCTURED_HF_TOKEN: ${{ secrets.HF_TOKEN }}
needs: [setup, lint]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Setup virtual environment
Expand Down Expand Up @@ -177,9 +177,9 @@ jobs:
NLTK_DATA: ${{ github.workspace }}/nltk_data
needs: [setup, lint]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Setup virtual environment
Expand All @@ -204,12 +204,12 @@ jobs:
NLTK_DATA: ${{ github.workspace }}/nltk_data
needs: [setup, lint, test_unit_no_extras]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: actions/cache/restore@v3
- uses: actions/cache/restore@v4
id: virtualenv-cache
with:
path: |
Expand Down Expand Up @@ -246,7 +246,7 @@ jobs:
NLTK_DATA: ${{ github.workspace }}/nltk_data
needs: [setup]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: ./.github/actions/base-ingest-cache
with:
python-version: ${{ matrix.python-version }}
Expand All @@ -261,7 +261,7 @@ jobs:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
Expand Down Expand Up @@ -289,7 +289,7 @@ jobs:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
Expand Down Expand Up @@ -342,6 +342,7 @@ jobs:
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OCTOAI_API_KEY: ${{ secrets.OCTOAI_API_KEY }}
PINECONE_API_KEY: ${{secrets.PINECONE_API_KEY}}
TABLE_OCR: "tesseract"
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
Expand Down Expand Up @@ -373,7 +374,7 @@ jobs:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
Expand Down Expand Up @@ -427,6 +428,29 @@ jobs:
tesseract --version
./test_unstructured_ingest/test-ingest-dest.sh
test_ingest_help:
environment: ci
strategy:
matrix:
python-version: ["3.9","3.10","3.11", "3.12"]
runs-on: ubuntu-latest
needs: [setup_ingest, lint]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Setup virtual environment
uses: ./.github/actions/base-ingest-cache
with:
python-version: ${{ matrix.python-version }}
- name: Validate --help
run: |
source .venv/bin/activate
./test_unstructured_ingest/test-help.sh
test_unstructured_api_unit:
strategy:
matrix:
Expand All @@ -437,7 +461,7 @@ jobs:
NLTK_DATA: ${{ github.workspace }}/nltk_data
needs: [setup, lint]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
Expand All @@ -449,7 +473,7 @@ jobs:
echo "SKIP_API_UNIT_FOR_BREAKING_CHANGE=true" >> $GITHUB_ENV
- name: Set up Python ${{ matrix.python-version }}
if: env.SKIP_API_UNIT_FOR_BREAKING_CHANGE == 'false'
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Setup virtual environment (no cache hit)
Expand Down Expand Up @@ -477,9 +501,9 @@ jobs:
runs-on: ubuntu-latest
steps:
# need to checkout otherwise paths-filter will fail on merge-queue trigger
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- if: github.ref != 'refs/heads/main'
uses: dorny/paths-filter@v2
uses: dorny/paths-filter@v3
id: changes
with:
filters: |
Expand All @@ -495,16 +519,15 @@ jobs:
runs-on: ubuntu-latest-m
needs: [ setup, lint ]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Test Dockerfile
run: |
echo "UNS_API_KEY=${{ secrets.UNS_API_KEY }}" > uns_test_env_file
make docker-dl-packages
make docker-build
make docker-test CI=true UNSTRUCTURED_INCLUDE_DEBUG_METADATA=true
- name: Scan image
uses: anchore/scan-action@v3
with:
image: "unstructured:dev"
# NOTE(robinson) - revert this to medium when we bump libreoffice
severity-cutoff: high
severity-cutoff: critical
only-fixed: true
5 changes: 2 additions & 3 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@ jobs:
password: ${{ secrets.QUAY_IO_ROBOT_TOKEN }}
- name: Build images
run: |
make docker-dl-packages
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
DOCKER_BUILDKIT=1 docker buildx build --platform=$ARCH --load \
-f Dockerfile-$ARCH \
DOCKER_BUILDKIT=1 docker buildx build --platform=${{ matrix.docker-platform }} --load \
-f Dockerfile \
--build-arg PIP_VERSION=$PIP_VERSION \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--progress plain \
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/ingest-test-fixtures-update-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ jobs:
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OCTOAI_API_KEY: ${{ secrets.OCTOAI_API_KEY }}
TABLE_OCR: "tesseract"
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
OVERWRITE_FIXTURES: "true"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release-version-alert.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
- name: Restore Message from Cache
if: env.SKIP_STEPS != 'true'
id: restore-cache
uses: actions/cache/restore@v3
uses: actions/cache/restore@v4
with:
path: message_cache.txt
key: message-cache-${{ env.MESSAGE_HASH }}
Expand All @@ -69,7 +69,7 @@ jobs:
cat message_cache.txt
- name: Store Message in Cache
if: env.SKIP_STEPS != 'true'
uses: actions/cache/save@v3
uses: actions/cache/save@v4
with:
path: message_cache.txt
key: message-cache-${{ env.MESSAGE_HASH }}
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -205,5 +205,5 @@ examples/**/output/
outputdiff.txt
metricsdiff.txt

# APK packages for the docker build
docker-packages/*
# analysis
annotated/
Loading

0 comments on commit e197f69

Please sign in to comment.