diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 82e0fd7f6..8e3382851 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, '3.10', '3.11'] + python-version: [3.9, '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 @@ -42,10 +42,10 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Set up Python 3.9 + - name: Set up Python 3.11 uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.11 - name: Build a binary wheel and a source tarball run: | python -m pip install build --user @@ -61,53 +61,9 @@ jobs: name: pypi_packages path: dist/* - build-n-publish-anaconda: - name: Build and publish anaconda packages - needs: lint_and_test - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/') - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v3 - with: - python-version: 3.9 - miniforge-variant: Mambaforge - - name: install dependencies build - shell: bash -l {0} - run: mamba install colorama pip ruamel ruamel.yaml rich jsonschema conda-verify anaconda-client - - name: Build linux-64 conda package - uses: prefix-dev/rattler-build-action@v0.2.6 - with: - recipe-path: "conda/recipe.yaml" - build-args: "--experimental --target-platform linux-64" - - name: Build osx-64 conda package - uses: prefix-dev/rattler-build-action@v0.2.6 - with: - recipe-path: "conda/recipe.yaml" - build-args: "--experimental --target-platform osx-64" -# - name: Build osx-arm64 conda package -# uses: prefix-dev/rattler-build-action@v0.2.6 -# with: -# recipe-path: "conda/recipe.yaml" -# build-args: "--experimental --target-platform osx-arm64" - - name: Upload conda package - run: | - for pkg in $(find output -type f \( -name "*.conda" -o -name "*.tar.bz2" \) ); do - echo "Uploading ${pkg}" - rattler-build upload anaconda -o mittagessen -a ${{ secrets.ANACONDA_TOKEN }} "${pkg}" - done - - name: Upload conda artifacts to GH storage - uses: actions/upload-artifact@v4 - with: - name: conda_packages - path: output/*/*.conda - autodraft-gh-release: name: Create github release - needs: [build-n-publish-anaconda, build-n-publish-pypi] + needs: build-n-publish-pypi runs-on: ubuntu-latest steps: @@ -140,10 +96,10 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Set up Python 3.9 + - name: Set up Python 3.11 uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.11 - name: Install sphinx-multiversion run: python -m pip install sphinx-multiversion sphinx-autoapi - name: Create docs diff --git a/README.rst b/README.rst index 747b1036c..72632cee2 100644 --- a/README.rst +++ b/README.rst @@ -25,18 +25,12 @@ Installation kraken only runs on **Linux or Mac OS X**. Windows is not supported. -The latest stable releases can be installed either from `PyPi `_: +The latest stable releases can be installed from `PyPi `_: :: $ pip install kraken -or through `conda `_: - -:: - - $ conda install -c conda-forge -c mittagessen kraken - If you want direct PDF and multi-image TIFF/JPEG2000 support it is necessary to install the `pdf` extras package for PyPi: diff --git a/conda/recipe.yaml b/conda/recipe.yaml deleted file mode 100644 index 9d8ed34ae..000000000 --- a/conda/recipe.yaml +++ /dev/null @@ -1,55 +0,0 @@ -context: - git_url: . - git_tag: ${{ git.latest_tag(git_url) }} - -package: - name: kraken - version: ${{ git_tag }} - -source: - git: ${{ git_url }} - tag: ${{ git_tag }} - -build: - script: pip install --no-deps . - -requirements: - build: - - python>=3.8,<3.12 - - setuptools>=36.6.0,<70.0.0 - - pbr - host: - - python>=3.8,<3.12 - run: - - python>=3.8,<3.12 - - python-bidi~=0.4.0 - - lxml - - regex - - requests - - click>=8.1 - - numpy~=1.23.0 - - pillow>=9.2.0 - - scipy~=1.11.0 - - jinja2~=3.0 - - torchvision - - pytorch~=2.1.0 - - cudatoolkit - - jsonschema - - scikit-image~=0.21.0 - - scikit-learn~=1.2.1 - - shapely~=1.8.5 - - pyvips - - coremltools - - pyarrow - - lightning~=2.2 - - torchmetrics>=1.1.0 - - conda-forge::threadpoolctl~=3.4.0 - - albumentations - - rich - -about: - homepage: https://kraken.re - license: Apache-2.0 - summary: 'OCR/HTR engine for all the languages' - repository: https://github.com/mittagessen/kraken - documentation: https://kraken.re diff --git a/docs/index.rst b/docs/index.rst index dda41e35e..4c1076a09 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -67,22 +67,12 @@ or respectively. -Installation using Conda ------------------------- +Development Branch Installation using Conda +------------------------------------------- -To install the stable version through `conda `_: - -.. code-block:: console - - $ conda install -c conda-forge -c mittagessen kraken - -Again PDF/multi-page TIFF/JPEG2000 support requires some additional dependencies: - -.. code-block:: console - - $ conda install -c conda-forge pyvips - -The git repository contains some environment files that aid in setting up the latest development version: +To install the latest development branch through `conda +`_ clone the kraken git repository and install with the +provided environment files: .. code-block:: console diff --git a/environment.yml b/environment.yml index 47e991eeb..6d192901b 100644 --- a/environment.yml +++ b/environment.yml @@ -4,32 +4,32 @@ channels: - conda-forge dependencies: - python>=3.9 - - python-bidi~=0.4.0 + - python-bidi~=0.6.0 - lxml - regex - requests - click>=8.1 - - numpy~=1.23.0 + - numpy~=2.0.0 - pillow~=9.2.0 - - scipy~=1.10.0 + - scipy~=1.13.0 - jinja2~=3.0 - conda-forge::torchvision-cpu>=0.5.0 - - conda-forge::pytorch-cpu~=2.1.0 + - conda-forge::pytorch-cpu~=2.4.0 - jsonschema - scikit-learn~=1.2.1 - - scikit-image~=0.21.0 - - shapely~=1.8.5 + - scikit-image~=0.24.0 + - shapely>=2.0.6,~=2.0.6 - pyvips - imagemagick>=7.1.0 - pyarrow - importlib-resources>=1.3.0 - - conda-forge::lightning~=2.2.0 + - conda-forge::lightning~=2.4.0 - conda-forge::torchmetrics>=1.1.0 - - conda-forge::threadpoolctl~=3.4 + - conda-forge::threadpoolctl~=3.5.0 - pip - albumentations - rich - setuptools>=36.6.0,<70.0.0 - pip: - - coremltools~=6.0 + - coremltools~=8.1 - file:. diff --git a/environment_cuda.yml b/environment_cuda.yml index fa0e3edc9..d9525927b 100644 --- a/environment_cuda.yml +++ b/environment_cuda.yml @@ -4,33 +4,33 @@ channels: - conda-forge dependencies: - python>=3.9 - - python-bidi~=0.4.0 + - python-bidi~=0.6.0 - lxml - regex - requests - click>=8.1 - numpy~=1.23 - pillow>=9.2.0 - - scipy~=1.10.0 + - scipy~=1.13.0 - jinja2~=3.0 - conda-forge::torchvision>=0.5.0 - - conda-forge::pytorch~=2.1.0 + - conda-forge::pytorch~=2.4.0 - cudatoolkit>=9.2 - jsonschema - scikit-learn~=1.2.1 - - scikit-image~=0.21.0 - - shapely~=1.8.5 + - scikit-image~=0.24.0 + - shapely>=2.0.6,~=2.0.6 - pyvips - imagemagick>=7.1.0 - pyarrow - importlib-resources>=1.3.0 - - conda-forge::lightning~=2.2.0 + - conda-forge::lightning~=2.4.0 - conda-forge::torchmetrics>=1.1.0 - - conda-forge::threadpoolctl~=3.4 + - conda-forge::threadpoolctl~=3.5.0 - pip - albumentations - rich - setuptools>=36.6.0,<70.0.0 - pip: - - coremltools~=6.0 + - coremltools~=8.1 - file:. diff --git a/kraken/lib/segmentation.py b/kraken/lib/segmentation.py index 0eb7f0bdc..2c14d7bc9 100644 --- a/kraken/lib/segmentation.py +++ b/kraken/lib/segmentation.py @@ -757,14 +757,10 @@ def calculate_polygonal_environment(im: Image.Image = None, line = np.array(line.coords, dtype=float) offset_line = np.array(offset_line.coords, dtype=float) - # parallel_offset on the right reverses the coordinate order - if not topline: - offset_line = offset_line[::-1] # calculate magnitude-weighted average direction vector lengths = np.linalg.norm(np.diff(line.T), axis=0) p_dir = np.mean(np.diff(line.T) * lengths/lengths.sum(), axis=1) p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1))) - env_up, env_bottom = _calc_roi(line, bounds, baselines[:idx] + baselines[idx+1:], suppl_obj, p_dir) polygons.append(_extract_patch(env_up, diff --git a/setup.cfg b/setup.cfg index 1400d5d12..73cb67fe4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,10 +14,10 @@ classifier = Intended Audience :: Science/Research License :: OSI Approved :: Apache Software License Operating System :: POSIX - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Topic :: Scientific/Engineering :: Image Recognition Topic :: Scientific/Engineering :: Artificial Intelligence @@ -38,29 +38,29 @@ max_line_length = 160 exclude = tests/* [options] -python_requires = >=3.8,<=3.11.99 +python_requires = >=3.9,<3.13 install_requires = jsonschema lxml requests click>=8.1 - numpy~=1.23.0 + numpy~=2.0.0 Pillow>=9.2.0 regex - scipy~=1.10.0 # bump up to latest release with py3.8 EOL + scipy~=1.13.0 protobuf>=3.0.0 - coremltools~=6.0 + coremltools~=8.1 jinja2~=3.0 - python-bidi~=0.4.0 + python-bidi~=0.6.0 torchvision>=0.5.0 - torch~=2.1.0 - scikit-learn~=1.2.1 - scikit-image~=0.21.0 - shapely~=1.8.5 + torch~=2.4.0 + scikit-learn~=1.5.0 + scikit-image~=0.24.0 + shapely>=2.0.6,~=2.0.6 pyarrow - lightning~=2.2.0 + lightning~=2.4.0 torchmetrics>=1.1.0 - threadpoolctl~=3.4.0 + threadpoolctl~=3.5.0 importlib-resources>=1.3.0 rich diff --git a/tests/test_newpolygons.py b/tests/test_newpolygons.py index 7c24ffb7b..119d99e7a 100644 --- a/tests/test_newpolygons.py +++ b/tests/test_newpolygons.py @@ -249,7 +249,7 @@ def test_ketoscli_train_old_model(self): fp = str(Path(tempdir) / "test.xml") self._test_ketoscli( - args=['train', '-f', 'xml', '-N', '1', '-q', 'fixed', '-i', self.old_model_path, '--resize', 'add', '-o', mfp, '--workers', '0', self.segmented_img], + args=['train', '-f', 'xml', '-N', '1', '-q', 'fixed', '-i', self.old_model_path, '--resize', 'union', '-o', mfp, '--workers', '0', self.segmented_img], expect_legacy=False, check_exit_code=[0, 1], # Model may not improve during training ) @@ -268,7 +268,7 @@ def test_ketoscli_train_old_model_force_legacy(self): fp = str(Path(tempdir) / "test.xml") self._test_ketoscli( - args=['train', '--legacy-polygons', '-f', 'xml', '-N', '1', '-q', 'fixed', '-i', self.old_model_path, '--resize', 'add', '-o', mfp, '--workers', '0', self.segmented_img], + args=['train', '--legacy-polygons', '-f', 'xml', '-N', '1', '-q', 'fixed', '-i', self.old_model_path, '--resize', 'union', '-o', mfp, '--workers', '0', self.segmented_img], expect_legacy=True, check_exit_code=[0, 1], # Model may not improve during training ) @@ -326,7 +326,7 @@ def test_ketoscli_pretrain_old_model(self): fp = str(Path(tempdir) / "test.xml") self._test_ketoscli( - args=['pretrain', '-f', 'xml', '-N', '1', '-q', 'fixed', '-i', self.old_model_path, '--resize', 'add', '-o', mfp, '--workers', '0', self.segmented_img], + args=['pretrain', '-f', 'xml', '-N', '1', '-q', 'fixed', '-i', self.old_model_path, '--resize', 'union', '-o', mfp, '--workers', '0', self.segmented_img], expect_legacy=False, check_exit_code=[0, 1], # Model may not improve during training ) @@ -349,7 +349,7 @@ def _assertWarnsWhenTrainingArrow(self, if force_legacy: args = ['--legacy-polygons'] + args if from_model: - args = ['-i', from_model, '--resize', 'add'] + args + args = ['-i', from_model, '--resize', 'union'] + args print("ketos", 'train', *args) run = self.runner.invoke(ketos_cli, ['train'] + args) diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index 044779813..11d02ceab 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -8,6 +8,7 @@ from lxml import etree from PIL import Image +from kraken import containers from kraken.transcribe import TranscriptionInterface thisfile = Path(__file__).resolve().parent @@ -18,16 +19,25 @@ class TestTranscriptionInterface(unittest.TestCase): """ Test of the transcription interface generation """ + def setUp(self): + with open(resources /'records.json', 'r') as fp: + self.box_records = [containers.BBoxOCRRecord(**x) for x in json.load(fp)] + + self.box_segmentation = containers.Segmentation(type='bbox', + imagename='foo.png', + text_direction='horizontal-lr', + lines=self.box_records, + script_detection=True, + regions={}) + + self.im = Image.open(resources / 'input.jpg') def test_transcription_generation(self): """ Tests creation of transcription interfaces with segmentation. """ tr = TranscriptionInterface() - with open(resources / 'segmentation.json') as fp: - seg = json.load(fp) - with Image.open(resources / 'input.jpg') as im: - tr.add_page(im, seg) + tr.add_page(im = self.im, segmentation=self.box_segmentation) fp = BytesIO() tr.write(fp) # this will not throw an exception ever so we need a better validator