Skip to content

Commit

Permalink
Implement #140
Browse files Browse the repository at this point in the history
Get installed tesseract languages from backend

Signed-off-by: Robin Windey <[email protected]>
  • Loading branch information
R0Wi committed Sep 24, 2022
1 parent 169b0dd commit de01176
Show file tree
Hide file tree
Showing 24 changed files with 923 additions and 99 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Build artifact

on:
pull_request:
workflow_dispatch:

env:
Expand Down Expand Up @@ -36,5 +37,5 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v1
with:
name: ${{ env.APP_NAME }}.tar.gz
path: ${{ env.APP_NAME }}/build/artifacts/appstore/${{ env.APP_NAME }}.tar.gz
name: ${{ env.APP_NAME }}.tar.gz
path: ${{ env.APP_NAME }}/build/artifacts/appstore/${{ env.APP_NAME }}.tar.gz
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ appstore:
--exclude="../$(app_name)/*.json" \
--exclude="../$(app_name)/*.lock" \
--exclude="../$(app_name)/*.cov" \
--exclude="../$(app_name)/psalm.xml" \
../$(app_name) \

.PHONY: test
Expand Down
5 changes: 3 additions & 2 deletions appinfo/routes.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@

return [
'routes' => [
['name' => 'GlobalSettings#getGlobalSettings', 'url' => '/globalsettings', 'verb' => 'GET'],
['name' => 'GlobalSettings#setGlobalSettings', 'url' => '/globalsettings', 'verb' => 'PUT']
['name' => 'GlobalSettings#getGlobalSettings', 'url' => '/globalSettings', 'verb' => 'GET'],
['name' => 'GlobalSettings#setGlobalSettings', 'url' => '/globalSettings', 'verb' => 'PUT'],
['name' => 'OcrBackendInfo#getInstalledLanguages', 'url' => '/ocrBackendInfo/installedLangs', 'verb' => 'GET']
]
];
3 changes: 3 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
use OCA\WorkflowOcr\Service\EventService;
use OCA\WorkflowOcr\Service\GlobalSettingsService;
use OCA\WorkflowOcr\Service\IGlobalSettingsService;
use OCA\WorkflowOcr\Service\IOcrBackendInfoService;
use OCA\WorkflowOcr\Service\IOcrService;
use OCA\WorkflowOcr\Service\OcrBackendInfoService;
use OCA\WorkflowOcr\Service\OcrService;
use OCA\WorkflowOcr\Wrapper\CommandWrapper;
use OCA\WorkflowOcr\Wrapper\Filesystem;
Expand Down Expand Up @@ -76,6 +78,7 @@ public function register(IRegistrationContext $context): void {
$context->registerServiceAlias(IFilesystem::class, Filesystem::class);
$context->registerServiceAlias(IGlobalSettingsService::class, GlobalSettingsService::class);
$context->registerServiceAlias(IEventService::class, EventService::class);
$context->registerServiceAlias(IOcrBackendInfoService::class, OcrBackendInfoService::class);

// BUG #43
$context->registerService(ICommand::class, function () {
Expand Down
41 changes: 41 additions & 0 deletions lib/Controller/ControllerBase.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php

declare(strict_types=1);

/**
* @copyright Copyright (c) 2022 Robin Windey <[email protected]>
*
* @author Robin Windey <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

namespace OCA\WorkflowOcr\Controller;

use OCP\AppFramework\Controller;
use OCP\AppFramework\Http\JSONResponse;

abstract class ControllerBase extends Controller {
protected function tryExecute(callable $function) : JSONResponse {
try {
$result = $function();
return new JSONResponse($result);
} catch (\Throwable $e) {
return new JSONResponse(['error' => $e->getMessage()], 500);
}
}
}
12 changes: 1 addition & 11 deletions lib/Controller/GlobalSettingsController.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,13 @@

use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Service\IGlobalSettingsService;
use OCP\AppFramework\Controller;
use OCP\AppFramework\Http\JSONResponse;
use OCP\IRequest;

/**
* This is the backend API controller for the Admin.vue component.
*/
class GlobalSettingsController extends Controller {
class GlobalSettingsController extends ControllerBase {
/** @var IGlobalSettingsService */
private $globalSettingsService;

Expand Down Expand Up @@ -66,13 +65,4 @@ public function setGlobalSettings(array $globalSettings) : JSONResponse {
return $this->globalSettingsService->getGlobalSettings();
});
}

private function tryExecute(callable $function) : JSONResponse {
try {
$result = $function();
return new JSONResponse($result);
} catch (\Throwable $e) {
return new JSONResponse(['error' => $e->getMessage()], 500);
}
}
}
53 changes: 53 additions & 0 deletions lib/Controller/OcrBackendInfoController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php

declare(strict_types=1);

/**
* @copyright Copyright (c) 2022 Robin Windey <[email protected]>
*
* @author Robin Windey <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

namespace OCA\WorkflowOcr\Controller;

use OCA\WorkflowOcr\Service\IOcrBackendInfoService;
use OCP\AppFramework\Http\JSONResponse;
use OCP\IRequest;

/**
* This is the backend API controller which provides informations about the OCR backend system.
*/
class OcrBackendInfoController extends ControllerBase {
/** @var IOcrBackendInfoService */
private $ocrBackendInfoService;

public function __construct($AppName, IRequest $request, IOcrBackendInfoService $ocrBackendInfoService) {
parent::__construct($AppName, $request);
$this->ocrBackendInfoService = $ocrBackendInfoService;
}

/**
* @return JSONResponse
*/
public function getInstalledLanguages() : JSONResponse {
return $this->tryExecute(function () {
return $this->ocrBackendInfoService->getInstalledLanguages();
});
}
}
142 changes: 142 additions & 0 deletions lib/Migration/Version2404Date20220903071748.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
<?php

declare(strict_types=1);

/**
* @copyright Copyright (c) 2022 Robin Windey <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

namespace OCA\WorkflowOcr\Migration;

use Closure;
use Exception;
use OCP\DB\ISchemaWrapper;
use OCP\IDBConnection;
use OCP\Migration\IOutput;
use OCP\Migration\SimpleMigrationStep;

class Version2404Date20220903071748 extends SimpleMigrationStep {

/** @var IDBConnection */
private $db;

public function __construct(IDBConnection $db) {
$this->db = $db;
}

/**
* {@inheritDoc}
*/
public function name(): string {
return 'migrate lang codes';
}

/**
* {@inheritDoc}
*/
public function description(): string {
return 'Execute migration of language codes towards tesseract langugage codes (e.g. deu instead of de)';
}

/**
* {@inheritDoc}
*/
public function changeSchema(IOutput $output, Closure $schemaClosure, array $options): ?ISchemaWrapper {
// 'id' and new 'operation' value will be stored here
$datasetsToMigrate = $this->getDatasetsToMigrate();
$this->updateDatabase($datasetsToMigrate);

return null;
}

private function getDatasetsToMigrate() : array {
$langMapping = [
'de' => 'deu',
'en' => 'eng',
'fr' => 'fra',
'it' => 'ita',
'es' => 'spa',
'pt' => 'por',
'ru' => 'rus',
'chi' => 'chi_sim'
];

$builder = $this->db->getQueryBuilder();

$ocrFlowOperations = $builder->select('id', 'operation')
->from('flow_operations')
->where($builder->expr()->eq('class', $builder->createNamedParameter('OCA\WorkflowOcr\Operation')))
->executeQuery();

$datasetsToMigrate = [];

try {
while ($row = $ocrFlowOperations->fetch()) {
$workflowSettings = json_decode($row['operation'], true);
$foundMapping = false;
$newLangArr = [];
$languagesArr = $workflowSettings['languages'];

// Check if we need to migrate the languages code.
// If yes, we have to regenerate the whole 'operation' string.
foreach ($languagesArr as $existingLang) {
if (array_key_exists($existingLang, $langMapping)) {
$newLangArr[] = $langMapping[$existingLang];
$foundMapping = true;
continue;
}
$newLangArr[] = $existingLang;
}

if ($foundMapping) {
$workflowSettings['languages'] = $newLangArr;
$datasetsToMigrate[] = [
'id' => $row['id'],
'operation' => json_encode($workflowSettings)
];
}
}
} finally {
$ocrFlowOperations->closeCursor();
}

return $datasetsToMigrate;
}

private function updateDatabase(array $datasetsToMigrate) : void {
$this->db->beginTransaction();

try {
$builder = $this->db->getQueryBuilder();
$builder->update('flow_operations')
->set('operation', $builder->createParameter('operation'))
->where($builder->expr()->eq('id', $builder->createParameter('id')));

foreach ($datasetsToMigrate as $dataset) {
$builder->setParameter('id', $dataset['id']);
$builder->setParameter('operation', $dataset['operation']);
$builder->executeStatement();
}
} catch (Exception $e) {
$this->db->rollBack();
throw $e;
}

$this->db->commit();
}
}
25 changes: 1 addition & 24 deletions lib/OcrProcessors/OcrMyPdfBasedProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,6 @@
use Psr\Log\LoggerInterface;

abstract class OcrMyPdfBasedProcessor implements IOcrProcessor {
/** @var array
* Mapping for VUE frontend lang settings.
* See also https://github.com/tesseract-ocr/tesseract/blob/main/doc/tesseract.1.asc#languages
*/
private static $langMapping = [
'de' => 'deu',
'en' => 'eng',
'fr' => 'fra',
'it' => 'ita',
'es' => 'spa',
'pt' => 'por',
'ru' => 'rus',
'chi' => 'chi_sim',
'est' => 'est',
'slk' => 'slk'
];

/** @var ICommand */
private $command;
Expand Down Expand Up @@ -127,14 +111,7 @@ private function getCommandlineArgs(WorkflowSettings $settings, GlobalSettings $

// Language settings
if ($settings->getLanguages()) {
$langStr = Chain::create($settings->getLanguages())
->map(function ($langCode) {
return self::$langMapping[(string)$langCode] ?? null;
})
->filter(function ($l) {
return $l !== null;
})
->join('+');
$langStr = Chain::create($settings->getLanguages())->join('+');
$args[] = "-l $langStr";
}

Expand Down
38 changes: 38 additions & 0 deletions lib/Service/IOcrBackendInfoService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?php

declare(strict_types=1);

/**
* @copyright Copyright (c) 2022 Robin Windey <[email protected]>
*
* @author Robin Windey <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

namespace OCA\WorkflowOcr\Service;

interface IOcrBackendInfoService {

/**
* Returns all languages that are supported by the OCR backend.
* Languages will be returned as an array of language-code-strings,
* currently defined at https://github.com/tesseract-ocr/tesseract/blob/main/doc/tesseract.1.asc#languages.
* @return array string[]
*/
public function getInstalledLanguages() : array;
}
Loading

0 comments on commit de01176

Please sign in to comment.