diff --git a/.github/actions/base-cache/action.yml b/.github/actions/base-cache/action.yml
index 2aaf5497b5..05b1ddc227 100644
--- a/.github/actions/base-cache/action.yml
+++ b/.github/actions/base-cache/action.yml
@@ -30,7 +30,9 @@ runs:
       shell: bash
       run: |
         python${{ inputs.python-version }} -m pip install --upgrade virtualenv
-        python${{ inputs.python-version }} -m venv .venv
+        if [ ! -d ".venv" ]; then
+          python${{ inputs.python-version }} -m venv .venv
+        fi
         source .venv/bin/activate
         [ ! -d "$NLTK_DATA" ] && mkdir "$NLTK_DATA"
         if [ "${{ inputs.python-version == '3.12' }}" == "true" ]; then
@@ -38,6 +40,7 @@ runs:
           python -m pip install --upgrade setuptools
         fi
         make install-ci
+        make install-nltk-models
     - name: Save Cache
       if: steps.virtualenv-cache-restore.outputs.cache-hit != 'true'
       id: virtualenv-cache-save
diff --git a/.github/actions/base-ingest-cache/action.yml b/.github/actions/base-ingest-cache/action.yml
index f29d867646..dc9d5105a2 100644
--- a/.github/actions/base-ingest-cache/action.yml
+++ b/.github/actions/base-ingest-cache/action.yml
@@ -18,7 +18,7 @@ runs:
         path: |
           .venv
           nltk_data
-        key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}-${{ hashFiles('requirements/*.txt') }}
+        key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt', 'requirements/*.txt') }}
         lookup-only: ${{ inputs.check-only }}
     - name: Set up Python ${{ inputs.python-version }}
       if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
@@ -39,6 +39,8 @@ runs:
           python -m pip install --upgrade setuptools
         fi
         make install-ci
+        make install-nltk-models
+        make install-all-docs
         make install-ingest
     - name: Save Ingest Cache
       if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true'
@@ -48,5 +50,5 @@ runs:
         path: |
           .venv
           nltk_data
-        key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt') }}-${{ hashFiles('requirements/*.txt') }}
+        key: unstructured-ingest-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('requirements/ingest/*.txt', 'requirements/*.txt') }}
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 81afe54c52..88fe84680b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,14 +12,15 @@ permissions:
   id-token: write
   contents: read
 
+env:
+  NLTK_DATA: ${{ github.workspace }}/nltk_data
+
 jobs:
   setup:
     strategy:
       matrix:
         python-version: ["3.9","3.10","3.11", "3.12"]
     runs-on: ubuntu-latest
-    env:
-      NLTK_DATA: ${{ github.workspace }}/nltk_data
     steps:
     - uses: actions/checkout@v4
     - uses: ./.github/actions/base-cache
@@ -78,8 +79,6 @@ jobs:
     strategy:
       matrix:
         python-version: ["3.9","3.10","3.11"]
-    env:
-      NLTK_DATA: ${{ github.workspace }}/nltk_data
     runs-on: ubuntu-latest
     needs: [setup, changelog]
     steps:
@@ -185,8 +184,6 @@ jobs:
         python-version: ["3.10"]
         extra: ["csv", "docx", "odt", "markdown", "pypandoc", "pdf-image", "pptx", "xlsx"]
     runs-on: ubuntu-latest
-    env:
-      NLTK_DATA: ${{ github.workspace }}/nltk_data
     needs: [setup, lint, test_unit_no_extras]
     steps:
     - uses: actions/checkout@v4
@@ -220,6 +217,7 @@ jobs:
         sudo apt-get update
         sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
         tesseract --version
+        make install-${{ matrix.extra }}
         make test-extra-${{ matrix.extra }} CI=true
 
   setup_ingest:
@@ -227,8 +225,6 @@ jobs:
       matrix:
         python-version: [ "3.9","3.10" ]
     runs-on: ubuntu-latest
-    env:
-      NLTK_DATA: ${{ github.workspace }}/nltk_data
     needs: [setup]
     steps:
       - uses: actions/checkout@v4
@@ -307,7 +303,6 @@ jobs:
         MXBAI_API_KEY: ${{secrets.MXBAI_API_KEY}}
         OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
         CI: "true"
-        NLTK_DATA: ${{ github.workspace }}/nltk_data
         PYTHON: python${{ matrix.python-version }}
       run: |
         source .venv/bin/activate
@@ -320,6 +315,8 @@ jobs:
         sudo apt-get install -y tesseract-ocr-kor
         sudo apt-get install diffstat
         tesseract --version
+        make install-all-docs
+        make install-ingest
         ./test_unstructured_ingest/test-ingest-src.sh
 
 
@@ -329,8 +326,6 @@ jobs:
         # NOTE(yuming): Unstructured API only use Python 3.10
         python-version: ["3.10"]
     runs-on: ubuntu-latest
-    env:
-      NLTK_DATA: ${{ github.workspace }}/nltk_data
     needs: [setup, lint]
     steps:
     - uses: actions/checkout@v4
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1be43eafdf..8b85d3a75e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,8 @@
-## 0.16.1-dev5
+## 0.16.1-dev6
 
 ### Enhancements
 
+* **Bump `unstructured-inference` to 0.7.39** and upgrade other dependencies
 * **Round coordinates** Round coordinates when computing bounding box overlaps in `pdfminer_processing.py` to nearest machine precision. This can help reduce underterministic behavior from machine precision that affects which bounding boxes to combine.
 
 ### Features
diff --git a/requirements/base.txt b/requirements/base.txt
index 5ff129c06a..b4da37cc68 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./base.in
 #
-anyio==4.6.0
+anyio==4.6.2.post1
     # via httpx
 backoff==2.2.1
     # via -r ./base.in
@@ -20,7 +20,7 @@ cffi==1.17.1
     # via cryptography
 chardet==5.2.0
     # via -r ./base.in
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   requests
     #   unstructured-client
@@ -28,7 +28,7 @@ click==8.1.7
     # via
     #   nltk
     #   python-oxmsg
-cryptography==43.0.1
+cryptography==43.0.3
     # via unstructured-client
 dataclasses-json==0.6.7
     # via
@@ -62,7 +62,7 @@ langdetect==1.0.9
     # via -r ./base.in
 lxml==5.3.0
     # via -r ./base.in
-marshmallow==3.22.0
+marshmallow==3.23.0
     # via
     #   dataclasses-json
     #   unstructured-client
@@ -84,7 +84,7 @@ packaging==24.1
     # via
     #   marshmallow
     #   unstructured-client
-psutil==6.0.0
+psutil==6.1.0
     # via -r ./base.in
 pycparser==2.22
     # via cffi
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 3ce9e87d64..bd90364012 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./dev.in
 #
-build==1.2.2
+build==1.2.2.post1
     # via pip-tools
 cfgv==3.4.0
     # via pre-commit
@@ -13,7 +13,7 @@ click==8.1.7
     #   -c ./base.txt
     #   -c ./test.txt
     #   pip-tools
-distlib==0.3.8
+distlib==0.3.9
     # via virtualenv
 filelock==3.16.1
     # via virtualenv
@@ -36,7 +36,7 @@ platformdirs==4.3.6
     # via
     #   -c ./test.txt
     #   virtualenv
-pre-commit==3.8.0
+pre-commit==4.0.1
     # via -r ./dev.in
 pyproject-hooks==1.2.0
     # via
@@ -51,7 +51,7 @@ tomli==2.0.2
     #   -c ./test.txt
     #   build
     #   pip-tools
-virtualenv==20.26.6
+virtualenv==20.27.0
     # via pre-commit
 wheel==0.44.0
     # via pip-tools
diff --git a/requirements/extra-epub.txt b/requirements/extra-epub.txt
index 6946095500..a9533059da 100644
--- a/requirements/extra-epub.txt
+++ b/requirements/extra-epub.txt
@@ -4,5 +4,5 @@
 #
 #    pip-compile ./extra-epub.in
 #
-pypandoc==1.13
+pypandoc==1.14
     # via -r ./extra-epub.in
diff --git a/requirements/extra-odt.txt b/requirements/extra-odt.txt
index 9451b480ca..28ebf301a6 100644
--- a/requirements/extra-odt.txt
+++ b/requirements/extra-odt.txt
@@ -8,7 +8,7 @@ lxml==5.3.0
     # via
     #   -c ./base.txt
     #   python-docx
-pypandoc==1.13
+pypandoc==1.14
     # via -r ./extra-odt.in
 python-docx==1.1.2
     # via -r ./extra-odt.in
diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt
index db0079f9f6..c758ad209b 100644
--- a/requirements/extra-paddleocr.txt
+++ b/requirements/extra-paddleocr.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./extra-paddleocr.in
 #
-anyio==4.6.0
+anyio==4.6.2.post1
     # via
     #   -c ./base.txt
     #   httpx
@@ -16,7 +16,7 @@ certifi==2024.8.30
     #   httpcore
     #   httpx
     #   requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   -c ./base.txt
     #   requests
@@ -52,7 +52,7 @@ idna==3.10
     #   anyio
     #   httpx
     #   requests
-imageio==2.35.1
+imageio==2.36.0
     # via
     #   imgaug
     #   scikit-image
@@ -104,7 +104,7 @@ paddlepaddle==3.0.0b1
     # via -r ./extra-paddleocr.in
 pdf2image==1.17.0
     # via unstructured-paddleocr
-pillow==10.4.0
+pillow==11.0.0
     # via
     #   imageio
     #   imgaug
@@ -117,9 +117,9 @@ protobuf==4.25.5
     # via
     #   -c ././deps/constraints.txt
     #   paddlepaddle
-pyclipper==1.3.0.post5
+pyclipper==1.3.0.post6
     # via unstructured-paddleocr
-pyparsing==3.1.4
+pyparsing==3.2.0
     # via matplotlib
 python-dateutil==2.9.0.post0
     # via
diff --git a/requirements/extra-pandoc.txt b/requirements/extra-pandoc.txt
index bde50c2ba5..4125059733 100644
--- a/requirements/extra-pandoc.txt
+++ b/requirements/extra-pandoc.txt
@@ -4,5 +4,5 @@
 #
 #    pip-compile ./extra-pandoc.in
 #
-pypandoc==1.13
+pypandoc==1.14
     # via -r ./extra-pandoc.in
diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in
index f8a746d687..494f6dc4ff 100644
--- a/requirements/extra-pdf-image.in
+++ b/requirements/extra-pdf-image.in
@@ -11,5 +11,5 @@ google-cloud-vision
 effdet
 # Do not move to constraints.in, otherwise unstructured-inference will not be upgraded
 # when unstructured library is.
-unstructured-inference==0.7.36
+unstructured-inference==0.8.0
 unstructured.pytesseract>=0.3.12
diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt
index a7d3ce8cf1..0c88ff55d3 100644
--- a/requirements/extra-pdf-image.txt
+++ b/requirements/extra-pdf-image.txt
@@ -16,7 +16,7 @@ cffi==1.17.1
     # via
     #   -c ./base.txt
     #   cryptography
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   -c ./base.txt
     #   pdfminer-six
@@ -25,7 +25,7 @@ coloredlogs==15.0.1
     # via onnxruntime
 contourpy==1.3.0
     # via matplotlib
-cryptography==43.0.1
+cryptography==43.0.3
     # via
     #   -c ./base.txt
     #   pdfminer-six
@@ -48,7 +48,7 @@ fsspec==2024.9.0
     # via
     #   huggingface-hub
     #   torch
-google-api-core[grpc]==2.20.0
+google-api-core[grpc]==2.21.0
     # via google-cloud-vision
 google-auth==2.35.0
     # via
@@ -60,14 +60,14 @@ googleapis-common-protos==1.65.0
     # via
     #   google-api-core
     #   grpcio-status
-grpcio==1.66.2
+grpcio==1.67.0
     # via
     #   -c ././deps/constraints.txt
     #   google-api-core
     #   grpcio-status
 grpcio-status==1.62.3
     # via google-api-core
-huggingface-hub==0.25.1
+huggingface-hub==0.26.0
     # via
     #   timm
     #   tokenizers
@@ -93,7 +93,7 @@ lxml==5.3.0
     # via
     #   -c ./base.txt
     #   pikepdf
-markupsafe==2.1.5
+markupsafe==3.0.2
     # via jinja2
 matplotlib==3.9.2
     # via
@@ -117,6 +117,7 @@ numpy==1.26.4
     #   scipy
     #   torchvision
     #   transformers
+    #   unstructured-inference
 omegaconf==2.3.0
     # via effdet
 onnx==1.17.0
@@ -150,11 +151,11 @@ pdfminer-six==20231228
     #   pdfplumber
 pdfplumber==0.11.4
     # via layoutparser
-pi-heif==0.18.0
+pi-heif==0.20.0
     # via -r ./extra-pdf-image.in
 pikepdf==9.3.0
     # via -r ./extra-pdf-image.in
-pillow==10.4.0
+pillow==11.0.0
     # via
     #   layoutparser
     #   matplotlib
@@ -192,7 +193,7 @@ pycparser==2.22
     # via
     #   -c ./base.txt
     #   cffi
-pyparsing==3.1.4
+pyparsing==3.2.0
     # via matplotlib
 pypdf==5.0.1
     # via
@@ -242,11 +243,11 @@ six==1.16.0
     # via
     #   -c ./base.txt
     #   python-dateutil
-sympy==1.13.3
+sympy==1.13.1
     # via
     #   onnxruntime
     #   torch
-timm==1.0.9
+timm==1.0.11
     # via
     #   effdet
     #   unstructured-inference
@@ -254,13 +255,13 @@ tokenizers==0.19.1
     # via
     #   -c ././deps/constraints.txt
     #   transformers
-torch==2.4.1
+torch==2.5.0
     # via
     #   effdet
     #   timm
     #   torchvision
     #   unstructured-inference
-torchvision==0.19.1
+torchvision==0.20.0
     # via
     #   effdet
     #   timm
@@ -281,7 +282,7 @@ typing-extensions==4.12.2
     #   torch
 tzdata==2024.2
     # via pandas
-unstructured-inference==0.7.36
+unstructured-inference==0.8.0
     # via -r ./extra-pdf-image.in
 unstructured-pytesseract==0.3.13
     # via -r ./extra-pdf-image.in
diff --git a/requirements/extra-pptx.txt b/requirements/extra-pptx.txt
index 18bbad32ea..87119d3047 100644
--- a/requirements/extra-pptx.txt
+++ b/requirements/extra-pptx.txt
@@ -6,7 +6,7 @@
 #
 lxml==5.3.0
     # via python-pptx
-pillow==10.4.0
+pillow==11.0.0
     # via python-pptx
 python-pptx==1.0.2
     # via -r ./extra-pptx.in
diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt
index 7b2e04bde3..5741ccdcd5 100644
--- a/requirements/huggingface.txt
+++ b/requirements/huggingface.txt
@@ -8,7 +8,7 @@ certifi==2024.8.30
     # via
     #   -c ./base.txt
     #   requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   -c ./base.txt
     #   requests
@@ -25,7 +25,7 @@ fsspec==2024.9.0
     # via
     #   huggingface-hub
     #   torch
-huggingface-hub==0.25.1
+huggingface-hub==0.26.0
     # via
     #   tokenizers
     #   transformers
@@ -43,7 +43,7 @@ langdetect==1.0.9
     # via
     #   -c ./base.txt
     #   -r ./huggingface.in
-markupsafe==2.1.5
+markupsafe==3.0.2
     # via jinja2
 mpmath==1.3.0
     # via sympy
@@ -82,13 +82,13 @@ six==1.16.0
     # via
     #   -c ./base.txt
     #   langdetect
-sympy==1.13.3
+sympy==1.13.1
     # via torch
 tokenizers==0.19.1
     # via
     #   -c ././deps/constraints.txt
     #   transformers
-torch==2.4.1
+torch==2.5.0
     # via -r ./huggingface.in
 tqdm==4.66.5
     # via
diff --git a/requirements/test.txt b/requirements/test.txt
index 6c9660091a..f368f4f5d3 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,7 +6,7 @@
 #
 annotated-types==0.7.0
     # via pydantic
-anyio==4.6.0
+anyio==4.6.2.post1
     # via
     #   -c ./base.txt
     #   httpx
@@ -16,7 +16,7 @@ attrs==24.2.0
     # via jsonschema
 autoflake==2.3.1
     # via -r ./test.in
-black==24.8.0
+black==24.10.0
     # via -r ./test.in
 certifi==2024.8.30
     # via
@@ -24,7 +24,7 @@ certifi==2024.8.30
     #   httpcore
     #   httpx
     #   requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   -c ./base.txt
     #   requests
@@ -33,7 +33,7 @@ click==8.1.7
     #   -c ./base.txt
     #   black
     #   nltk
-coverage[toml]==7.6.1
+coverage[toml]==7.6.4
     # via
     #   -r ./test.in
     #   pytest-cov
@@ -50,7 +50,7 @@ flake8-print==5.0.0
     # via -r ./test.in
 freezegun==1.5.1
     # via -r ./test.in
-grpcio==1.66.2
+grpcio==1.67.0
     # via
     #   -c ././deps/constraints.txt
     #   -r ./test.in
@@ -95,7 +95,7 @@ mccabe==0.7.0
     # via flake8
 multidict==6.1.0
     # via yarl
-mypy==1.11.2
+mypy==1.12.1
     # via -r ./test.in
 mypy-extensions==1.0.0
     # via
@@ -119,12 +119,14 @@ pandas==2.2.3
     # via label-studio-sdk
 pathspec==0.12.1
     # via black
-pillow==10.4.0
+pillow==11.0.0
     # via label-studio-sdk
 platformdirs==4.3.6
     # via black
 pluggy==1.5.0
     # via pytest
+propcache==0.2.0
+    # via yarl
 pycodestyle==2.12.1
     # via
     #   flake8
@@ -226,7 +228,7 @@ urllib3==1.26.20
     #   -c ./base.txt
     #   requests
     #   vcrpy
-vcrpy==6.0.1
+vcrpy==6.0.2
     # via -r ./test.in
 wrapt==1.16.0
     # via
@@ -234,7 +236,7 @@ wrapt==1.16.0
     #   vcrpy
 xmljson==0.2.1
     # via label-studio-sdk
-yarl==1.13.1
+yarl==1.15.5
     # via vcrpy
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/test_unstructured/partition/pdf_image/test_inference_utils.py b/test_unstructured/partition/pdf_image/test_inference_utils.py
index 085377f189..1000b4bad1 100644
--- a/test_unstructured/partition/pdf_image/test_inference_utils.py
+++ b/test_unstructured/partition/pdf_image/test_inference_utils.py
@@ -1,4 +1,4 @@
-from unstructured_inference.inference.elements import TextRegion
+from unstructured_inference.inference.elements import TextRegion, TextRegions
 from unstructured_inference.inference.layoutelement import LayoutElement
 
 from unstructured.documents.elements import ElementType
@@ -17,7 +17,7 @@ def test_merge_text_regions(mock_embedded_text_regions):
         text="LayoutParser: A Unified Toolkit for Deep Learning Based Document Image",
     )
 
-    merged_text_region = merge_text_regions(mock_embedded_text_regions)
+    merged_text_region = merge_text_regions(TextRegions.from_list(mock_embedded_text_regions))
     assert merged_text_region == expected
 
 
diff --git a/test_unstructured/partition/pdf_image/test_pdf.py b/test_unstructured/partition/pdf_image/test_pdf.py
index 4362f06bbd..ac780caf7f 100644
--- a/test_unstructured/partition/pdf_image/test_pdf.py
+++ b/test_unstructured/partition/pdf_image/test_pdf.py
@@ -179,6 +179,12 @@ def _test(result):
         # check that the pdf has multiple different page numbers
         assert {element.metadata.page_number for element in result} == expected_page_numbers
         if UNSTRUCTURED_INCLUDE_DEBUG_METADATA:
+            print(
+                [
+                    (element.metadata.detection_origin, element.category, element.text)
+                    for element in result
+                ]
+            )
             assert {element.metadata.detection_origin for element in result} == origin
 
     if file_mode == "filename":
diff --git a/test_unstructured/partition/test_api.py b/test_unstructured/partition/test_api.py
index f95dd78595..93f36e2e15 100644
--- a/test_unstructured/partition/test_api.py
+++ b/test_unstructured/partition/test_api.py
@@ -19,8 +19,8 @@
 
 DIRECTORY = pathlib.Path(__file__).parent.resolve()
 
-# NOTE(crag): point to freemium API for now
-API_URL = "https://api.unstructured.io/general/v0/general"
+# NOTE(yao): point to paid API for now
+API_URL = "https://api.unstructuredapp.io/general/v0/general"
 
 is_in_ci = os.getenv("CI", "").lower() not in {"", "false", "f", "0"}
 skip_not_on_main = os.getenv("GITHUB_REF_NAME", "").lower() != "main"
diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json
index 164e9cfa2f..484b099f94 100644
--- a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json
+++ b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json
@@ -338,7 +338,20 @@
     "type": "ListItem"
   },
   {
-    "element_id": "6277cd91869e10d6256f362b08d3e789",
+    "element_id": "f0f0586caeb3af4284c1b367a5269d27",
+    "metadata": {
+      "data_source": {},
+      "filetype": "application/pdf",
+      "languages": [
+        "eng"
+      ],
+      "page_number": 2
+    },
+    "text": "452",
+    "type": "Header"
+  },
+  {
+    "element_id": "ac79570be092923eb29899f64281c3b3",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -351,7 +364,7 @@
     "type": "Table"
   },
   {
-    "element_id": "22b8448fe36b3ccd06d1d8e4ea2dc1ea",
+    "element_id": "13fd694e1ff862d163b840a246964e58",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -364,7 +377,7 @@
     "type": "Title"
   },
   {
-    "element_id": "f2b57562924402b85f6eb07925ea1654",
+    "element_id": "5f1c4074c1b5d641b724b99be6f5ddfd",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -377,7 +390,7 @@
     "type": "NarrativeText"
   },
   {
-    "element_id": "d9f6efffd49ef59e671206bfb5f094de",
+    "element_id": "afed004de4c50d761640b6c18729a988",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -390,7 +403,7 @@
     "type": "ListItem"
   },
   {
-    "element_id": "2a1e46bc589c5eca777b657e141e824b",
+    "element_id": "f93d89ccb971e2b60f44afbf710673c6",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -403,7 +416,7 @@
     "type": "NarrativeText"
   },
   {
-    "element_id": "2c42182c07ecdb96362b534a8fad4d59",
+    "element_id": "cb6e8acb9c24820b59f8973cc236ef35",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -416,7 +429,7 @@
     "type": "ListItem"
   },
   {
-    "element_id": "c6fd85f9219a2c75bb1f8c1889bb2b5f",
+    "element_id": "5964ede27be8850de7a13e0dd32c1b21",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -429,7 +442,7 @@
     "type": "NarrativeText"
   },
   {
-    "element_id": "07cdb1623f501ea23a343039300178cc",
+    "element_id": "e1f7e635d8739a97d8d0000ba8004f61",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -442,7 +455,7 @@
     "type": "ListItem"
   },
   {
-    "element_id": "4bf8165bcb21c5296b741ba0f9e38f93",
+    "element_id": "deb8964830ba1f9dd1eec7b08bd3ea19",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -455,7 +468,7 @@
     "type": "Title"
   },
   {
-    "element_id": "85918ce2a03e9f236137a0fe72985af0",
+    "element_id": "be270e13c935334fa3b17b13066d639b",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -468,7 +481,7 @@
     "type": "NarrativeText"
   },
   {
-    "element_id": "93537983496efa695cfc65ad895d9412",
+    "element_id": "5c97405ec921495b23d2b400516cbd06",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -481,7 +494,7 @@
     "type": "Image"
   },
   {
-    "element_id": "76b94e78b638b79374e266284c1a0d83",
+    "element_id": "7956ee39ac5e080a362967e2f6a5753e",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json
index 64c57d6dfc..5a25c95e60 100644
--- a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json
+++ b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json
@@ -598,20 +598,7 @@
     "type": "NarrativeText"
   },
   {
-    "element_id": "448de3300a8c7e2cfdd2028dd0bb4171",
-    "metadata": {
-      "data_source": {},
-      "filetype": "application/pdf",
-      "languages": [
-        "eng"
-      ],
-      "page_number": 2
-    },
-    "text": "and",
-    "type": "NarrativeText"
-  },
-  {
-    "element_id": "b13807f59ac7c6647ee0aee74f9b0dd3",
+    "element_id": "db6ff60cbdb77adc14a6b9491af8d161",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -624,7 +611,7 @@
     "type": "ListItem"
   },
   {
-    "element_id": "db480e847a5703b19be6b79223e1ee03",
+    "element_id": "9f6ef223a141a5381951eff39b3af039",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
@@ -637,7 +624,7 @@
     "type": "NarrativeText"
   },
   {
-    "element_id": "326c44638a881f86474b82cc244896f9",
+    "element_id": "5c67842128e14fc16344beaa2aa0111e",
     "metadata": {
       "data_source": {},
       "filetype": "application/pdf",
diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json
index 8c3c0f6ae6..66e1dbea73 100644
--- a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json
+++ b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json
@@ -1276,9 +1276,75 @@
       }
     }
   },
+  {
+    "type": "ListItem",
+    "element_id": "53b448c75f1556b1f60b4e3324bd0724",
+    "text": "1 import layoutparser as lp",
+    "metadata": {
+      "filetype": "application/pdf",
+      "languages": [
+        "eng"
+      ],
+      "page_number": 5,
+      "data_source": {
+        "record_locator": {
+          "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf"
+        },
+        "permissions_data": [
+          {
+            "mode": 33188
+          }
+        ]
+      }
+    }
+  },
+  {
+    "type": "ListItem",
+    "element_id": "a002e13c7ea2613b2eabb9ea3501856d",
+    "text": "3 model = lp . De t e c tro n2 Lay outM odel (",
+    "metadata": {
+      "filetype": "application/pdf",
+      "languages": [
+        "eng"
+      ],
+      "page_number": 5,
+      "data_source": {
+        "record_locator": {
+          "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf"
+        },
+        "permissions_data": [
+          {
+            "mode": 33188
+          }
+        ]
+      }
+    }
+  },
+  {
+    "type": "ListItem",
+    "element_id": "366c05fd7babc86bf01d690b9df755da",
+    "text": "5 layout = model . detect ( image )",
+    "metadata": {
+      "filetype": "application/pdf",
+      "languages": [
+        "eng"
+      ],
+      "page_number": 5,
+      "data_source": {
+        "record_locator": {
+          "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf"
+        },
+        "permissions_data": [
+          {
+            "mode": 33188
+          }
+        ]
+      }
+    }
+  },
   {
     "type": "NarrativeText",
-    "element_id": "59171bb0b4a32c9ec1b0e1d327ddb88f",
+    "element_id": "f888c5e8f5b1339f2af75612ea13c719",
     "text": "LayoutParser provides a wealth of pre-trained model weights using various datasets covering di\ufb00erent languages, time periods, and document types. Due to domain shift [7], the prediction performance can notably drop when models are ap- plied to target samples that are signi\ufb01cantly di\ufb00erent from the training dataset. As document structures and layouts vary greatly in di\ufb00erent domains, it is important to select models trained on a dataset similar to the test samples. A semantic syntax is used for initializing the model weights in LayoutParser, using both the dataset name and model name lp://<dataset-name>/<model-architecture-name>.",
     "metadata": {
       "filetype": "application/pdf",
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 038be7ea70..6ce43d5c79 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.16.1-dev5"  # pragma: no cover
+__version__ = "0.16.1-dev6"  # pragma: no cover
diff --git a/unstructured/partition/pdf_image/inference_utils.py b/unstructured/partition/pdf_image/inference_utils.py
index 6fdd4c05cf..7218eb93b9 100644
--- a/unstructured/partition/pdf_image/inference_utils.py
+++ b/unstructured/partition/pdf_image/inference_utils.py
@@ -3,7 +3,7 @@
 from typing import TYPE_CHECKING, Optional
 
 from unstructured_inference.constants import Source
-from unstructured_inference.inference.elements import TextRegion
+from unstructured_inference.inference.elements import TextRegion, TextRegions
 from unstructured_inference.inference.layoutelement import (
     LayoutElement,
     partition_groups_from_regions,
@@ -66,9 +66,9 @@ def build_layout_elements_from_ocr_regions(
             for r in regions:
                 ocr_regions.remove(r)
 
-            grouped_regions.append(regions)
+            grouped_regions.append(TextRegions.from_list(regions))
     else:
-        grouped_regions = partition_groups_from_regions(ocr_regions)
+        grouped_regions = partition_groups_from_regions(TextRegions.from_list(ocr_regions))
 
     merged_regions = [merge_text_regions(group) for group in grouped_regions]
     return [
@@ -79,12 +79,12 @@ def build_layout_elements_from_ocr_regions(
     ]
 
 
-def merge_text_regions(regions: list[TextRegion]) -> TextRegion:
+def merge_text_regions(regions: TextRegions) -> TextRegion:
     """
     Merge a list of TextRegion objects into a single TextRegion.
 
     Parameters:
-    - group (list[TextRegion]): A list of TextRegion objects to be merged.
+    - group (TextRegions): A group of TextRegion objects to be merged.
 
     Returns:
     - TextRegion: A single merged TextRegion object.
@@ -93,13 +93,12 @@ def merge_text_regions(regions: list[TextRegion]) -> TextRegion:
     if not regions:
         raise ValueError("The text regions to be merged must be provided.")
 
-    min_x1 = min([tr.bbox.x1 for tr in regions])
-    min_y1 = min([tr.bbox.y1 for tr in regions])
-    max_x2 = max([tr.bbox.x2 for tr in regions])
-    max_y2 = max([tr.bbox.y2 for tr in regions])
+    min_x1 = regions.x1.min().astype(float)
+    min_y1 = regions.y1.min().astype(float)
+    max_x2 = regions.x2.max().astype(float)
+    max_y2 = regions.y2.max().astype(float)
 
-    merged_text = " ".join([tr.text for tr in regions if tr.text])
-    sources = [tr.source for tr in regions]
-    source = sources[0] if all(s == sources[0] for s in sources) else None
+    merged_text = " ".join([text for text in regions.texts if text])
+    source = regions.source
 
     return TextRegion.from_coords(min_x1, min_y1, max_x2, max_y2, merged_text, source)