diff --git a/lib/BackgroundJobs/ProcessFileJob.php b/lib/BackgroundJobs/ProcessFileJob.php
index fb3dab3..6fe4c36 100644
--- a/lib/BackgroundJobs/ProcessFileJob.php
+++ b/lib/BackgroundJobs/ProcessFileJob.php
@@ -27,22 +27,10 @@
namespace OCA\WorkflowOcr\BackgroundJobs;
use \OCP\Files\File;
-use OC\User\NoUserException;
-use OCA\WorkflowOcr\Helper\IProcessingFileAccessor;
use OCA\WorkflowOcr\Model\WorkflowSettings;
-use OCA\WorkflowOcr\Service\IEventService;
use OCA\WorkflowOcr\Service\INotificationService;
use OCA\WorkflowOcr\Service\IOcrService;
-use OCA\WorkflowOcr\Wrapper\IFilesystem;
-use OCA\WorkflowOcr\Wrapper\IViewFactory;
use OCP\AppFramework\Utility\ITimeFactory;
-use OCP\Files\FileInfo;
-use OCP\Files\IRootFolder;
-use OCP\Files\Node;
-use OCP\Files\NotFoundException;
-use OCP\IUser;
-use OCP\IUserManager;
-use OCP\IUserSession;
use Psr\Log\LoggerInterface;
/**
@@ -52,47 +40,19 @@
class ProcessFileJob extends \OCP\BackgroundJob\QueuedJob {
/** @var LoggerInterface */
protected $logger;
- /** @var IRootFolder */
- private $rootFolder;
/** @var IOcrService */
private $ocrService;
- /** @var IEventService */
- private $eventService;
- /** @var IViewFactory */
- private $viewFactory;
- /** @var IFilesystem */
- private $filesystem;
- /** @var IUserManager */
- private $userManager;
- /** @var IUserSession */
- private $userSession;
- /** @var IProcessingFileAccessor */
- private $processingFileAccessor;
/** @var INotificationService */
private $notificationService;
public function __construct(
LoggerInterface $logger,
- IRootFolder $rootFolder,
IOcrService $ocrService,
- IEventService $eventService,
- IViewFactory $viewFactory,
- IFilesystem $filesystem,
- IUserManager $userManager,
- IUserSession $userSession,
- IProcessingFileAccessor $processingFileAccessor,
INotificationService $notificationService,
ITimeFactory $timeFactory) {
parent::__construct($timeFactory);
$this->logger = $logger;
- $this->rootFolder = $rootFolder;
$this->ocrService = $ocrService;
- $this->eventService = $eventService;
- $this->viewFactory = $viewFactory;
- $this->filesystem = $filesystem;
- $this->userManager = $userManager;
- $this->userSession = $userSession;
- $this->processingFileAccessor = $processingFileAccessor;
$this->notificationService = $notificationService;
}
@@ -104,13 +64,10 @@ protected function run($argument) : void {
try {
[$fileId, $uid, $settings] = $this->parseArguments($argument);
- $this->initUserEnvironment($uid);
- $this->processFile($fileId, $settings);
+ $this->ocrService->runOcrProcess($fileId, $uid, $settings);
} catch (\Throwable $ex) {
$this->logger->error($ex->getMessage(), ['exception' => $ex]);
$this->notificationService->createErrorNotification($uid, 'An error occured while executing the OCR process ('.$ex->getMessage().'). Please have a look at your servers logfile for more details.');
- } finally {
- $this->shutdownUserEnvironment();
}
$this->logger->debug('ENDED -- Run ' . self::class . ' job. Argument: {argument}.', ['argument' => $argument]);
@@ -135,87 +92,4 @@ private function parseArguments($argument) : array {
$settings
];
}
-
- /**
- * @param int $fileId The id of the file to be processed
- * @param WorkflowSettings $settings The settings to be used for processing
- */
- private function processFile(int $fileId, WorkflowSettings $settings) : void {
- $node = $this->getNode($fileId);
-
- $ocrFile = $this->ocrService->ocrFile($node, $settings);
-
- $filePath = $node->getPath();
- $fileContent = $ocrFile->getFileContent();
- $originalFileExtension = $node->getExtension();
- $newFileExtension = $ocrFile->getFileExtension();
-
- // Only create a new file version if the file OCR result was not empty #130
- if ($ocrFile->getRecognizedText() !== '') {
- $newFilePath = $originalFileExtension === $newFileExtension ?
- $filePath :
- $filePath . ".pdf";
-
- $this->createNewFileVersion($newFilePath, $fileContent, $fileId);
- }
-
- $this->eventService->textRecognized($ocrFile, $node);
- }
-
- private function getNode(int $fileId) : ?Node {
- /** @var File[] */
- $nodeArr = $this->rootFolder->getById($fileId);
- if (count($nodeArr) === 0) {
- throw new NotFoundException('Could not process file with id \'' . $fileId . '\'. File was not found');
- }
-
- $node = array_shift($nodeArr);
-
- if (!$node instanceof Node || $node->getType() !== FileInfo::TYPE_FILE) {
- throw new \InvalidArgumentException('Skipping process for file with id \'' . $fileId . '\'. It is not a file');
- }
-
- return $node;
- }
-
- /**
- * * @param string $userId The owners userId of the file to be processed
- */
- private function initUserEnvironment(string $userId) : void {
- /** @var IUser */
- $user = $this->userManager->get($userId);
- if (!$user) {
- throw new NoUserException("User with uid '$userId' was not found");
- }
-
- $this->userSession->setUser($user);
- $this->filesystem->init($userId, '/' . $userId . '/files');
- }
-
- private function shutdownUserEnvironment() : void {
- $this->userSession->setUser(null);
- }
-
- /**
- * @param string $filePath The filepath of the file to write
- * @param string $ocrContent The new filecontent (which was OCR processed)
- * @param int $fileId The id of the file to write. Used for locking.
- */
- private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId) : void {
- $dirPath = dirname($filePath);
- $filename = basename($filePath);
-
- $this->processingFileAccessor->setCurrentlyProcessedFileId($fileId);
-
- try {
- $view = $this->viewFactory->create($dirPath);
- // Create new file or file-version with OCR-file
- // This will trigger 'postWrite' event which would normally
- // add the file to the queue again but this is tackled
- // by the processingFileAccessor.
- $view->file_put_contents($filename, $ocrContent);
- } finally {
- $this->processingFileAccessor->setCurrentlyProcessedFileId(null);
- }
- }
}
diff --git a/lib/Exception/OcrResultEmptyException.php b/lib/Exception/OcrResultEmptyException.php
new file mode 100644
index 0000000..73e2c0c
--- /dev/null
+++ b/lib/Exception/OcrResultEmptyException.php
@@ -0,0 +1,32 @@
+
+ *
+ * @license GNU AGPL version 3 or any later version
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+namespace OCA\WorkflowOcr\Exception;
+
+use Exception;
+
+class OcrResultEmptyException extends Exception {
+ public function __construct(string $message) {
+ $this->message = $message;
+ }
+}
diff --git a/lib/Migration/Version2404Date20220903071748.php b/lib/Migration/Version2404Date20220903071748.php
index a6c3859..a29f9b1 100644
--- a/lib/Migration/Version2404Date20220903071748.php
+++ b/lib/Migration/Version2404Date20220903071748.php
@@ -89,7 +89,7 @@ private function getDatasetsToMigrate() : array {
$workflowSettings = json_decode($row['operation'], true);
$foundMapping = false;
$newLangArr = [];
- if (!array_key_exists('languages', $workflowSettings)) {
+ if (!$workflowSettings || !array_key_exists('languages', $workflowSettings)) {
continue;
}
$languagesArr = $workflowSettings['languages'];
diff --git a/lib/OcrProcessors/OcrMyPdfBasedProcessor.php b/lib/OcrProcessors/OcrMyPdfBasedProcessor.php
index 740f42a..1aa7cff 100644
--- a/lib/OcrProcessors/OcrMyPdfBasedProcessor.php
+++ b/lib/OcrProcessors/OcrMyPdfBasedProcessor.php
@@ -25,6 +25,7 @@
use Cocur\Chain\Chain;
use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
+use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
use OCA\WorkflowOcr\Helper\ISidecarFileAccessor;
use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Model\WorkflowSettings;
@@ -86,7 +87,7 @@ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $
$ocrFileContent = $this->command->getOutput();
if (!$ocrFileContent) {
- throw new OcrNotPossibleException('OCRmyPDF did not produce any output for file ' . $file->getPath());
+ throw new OcrResultEmptyException('OCRmyPDF did not produce any output for file ' . $file->getPath());
}
$recognizedText = $this->sidecarFileAccessor->getSidecarFileContent();
diff --git a/lib/Service/IOcrService.php b/lib/Service/IOcrService.php
index 96d9f3f..10ee68c 100644
--- a/lib/Service/IOcrService.php
+++ b/lib/Service/IOcrService.php
@@ -27,19 +27,19 @@
namespace OCA\WorkflowOcr\Service;
use OCA\WorkflowOcr\Model\WorkflowSettings;
-use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult;
-use OCP\Files\File;
interface IOcrService {
/**
- * Processes OCR on the given file
+ * Processes OCR on the given file. Creates a new file version and emits appropriate events.
*
- * @param string $mimeType The mimetype of the file to be processed
- * @param string $fileContent The file to be processed
+ * @param int $fileId The id if the file to be processed
+ * @param string $uid The id of the user who has access to this file
* @param WorkflowSettings $settings The settings to be used for processing
*
* @throws \OCA\WorkflowOcr\Exception\OcrNotPossibleException
* @throws \OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException
+ * @throws \OCA\WorkflowOcr\Exception\OcrResultEmptyException
+ * @throws \InvalidArgumentException
*/
- public function ocrFile(File $file, WorkflowSettings $settings) : OcrProcessorResult;
+ public function runOcrProcess(int $fileId, string $uid, WorkflowSettings $settings) : void;
}
diff --git a/lib/Service/OcrService.php b/lib/Service/OcrService.php
index 64790ff..e2ce5db 100644
--- a/lib/Service/OcrService.php
+++ b/lib/Service/OcrService.php
@@ -26,10 +26,21 @@
namespace OCA\WorkflowOcr\Service;
+use OC\User\NoUserException;
+use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
+use OCA\WorkflowOcr\Helper\IProcessingFileAccessor;
use OCA\WorkflowOcr\Model\WorkflowSettings;
use OCA\WorkflowOcr\OcrProcessors\IOcrProcessorFactory;
-use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult;
+use OCA\WorkflowOcr\Wrapper\IFilesystem;
+use OCA\WorkflowOcr\Wrapper\IViewFactory;
use OCP\Files\File;
+use OCP\Files\FileInfo;
+use OCP\Files\IRootFolder;
+use OCP\Files\Node;
+use OCP\Files\NotFoundException;
+use OCP\IUser;
+use OCP\IUserManager;
+use OCP\IUserSession;
use OCP\SystemTag\ISystemTagObjectMapper;
use OCP\SystemTag\TagNotFoundException;
use Psr\Log\LoggerInterface;
@@ -44,22 +55,129 @@ class OcrService implements IOcrService {
/** @var ISystemTagObjectMapper */
private $systemTagObjectMapper;
+ /** @var IUserManager */
+ private $userManager;
+
+ /** @var IFilesystem */
+ private $filesystem;
+
+ /** @var IUserSession */
+ private $userSession;
+
+ /** @var IRootFolder */
+ private $rootFolder;
+
+ /** @var IEventService */
+ private $eventService;
+
+ /** @var IViewFactory */
+ private $viewFactory;
+
+ /** @var IProcessingFileAccessor */
+ private $processingFileAccessor;
+
/** @var LoggerInterface */
private $logger;
- public function __construct(IOcrProcessorFactory $ocrProcessorFactory, IGlobalSettingsService $globalSettingsService, ISystemTagObjectMapper $systemTagObjectMapper, LoggerInterface $logger) {
+ public function __construct(
+ IOcrProcessorFactory $ocrProcessorFactory,
+ IGlobalSettingsService $globalSettingsService,
+ ISystemTagObjectMapper $systemTagObjectMapper,
+ IUserManager $userManager,
+ IFilesystem $filesystem,
+ IUserSession $userSession,
+ IRootFolder $rootFolder,
+ IEventService $eventService,
+ IViewFactory $viewFactory,
+ IProcessingFileAccessor $processingFileAccessor,
+ LoggerInterface $logger) {
$this->ocrProcessorFactory = $ocrProcessorFactory;
$this->globalSettingsService = $globalSettingsService;
$this->systemTagObjectMapper = $systemTagObjectMapper;
+ $this->userManager = $userManager;
+ $this->filesystem = $filesystem;
+ $this->userSession = $userSession;
+ $this->rootFolder = $rootFolder;
+ $this->eventService = $eventService;
+ $this->viewFactory = $viewFactory;
+ $this->processingFileAccessor = $processingFileAccessor;
$this->logger = $logger;
}
/** @inheritdoc */
- public function ocrFile(File $file, WorkflowSettings $settings) : OcrProcessorResult {
- $ocrProcessor = $this->ocrProcessorFactory->create($file->getMimeType());
- $result = $ocrProcessor->ocrFile($file, $settings, $this->globalSettingsService->getGlobalSettings());
- $this->processTagsAfterSuccessfulOcr($file, $settings);
- return $result;
+ public function runOcrProcess(int $fileId, string $uid, WorkflowSettings $settings) : void {
+ try {
+ $this->initUserEnvironment($uid);
+
+ $file = $this->getNode($fileId);
+ $ocrProcessor = $this->ocrProcessorFactory->create($file->getMimeType());
+ $globalSettings = $this->globalSettingsService->getGlobalSettings();
+
+ try {
+ $result = $ocrProcessor->ocrFile($file, $settings, $globalSettings);
+ } catch(OcrResultEmptyException $ex) {
+ // #232: it's okay to have an empty result if the file was skipped due to OCR mode
+ if ($settings->getOcrMode() === WorkflowSettings::OCR_MODE_SKIP_FILE) {
+ $this->logger->debug('Skipping empty OCR result for file with id {fileId} because OCR mode is set to \'skip file\'', ['fileId' => $fileId]);
+ return;
+ }
+ throw $ex;
+ }
+
+ $this->processTagsAfterSuccessfulOcr($file, $settings);
+
+ $filePath = $file->getPath();
+ $fileContent = $result->getFileContent();
+ $originalFileExtension = $file->getExtension();
+ $newFileExtension = $result->getFileExtension();
+
+ // Only create a new file version if the file OCR result was not empty #130
+ if ($result->getRecognizedText() !== '') {
+ $newFilePath = $originalFileExtension === $newFileExtension ?
+ $filePath :
+ $filePath . ".pdf";
+
+ $this->createNewFileVersion($newFilePath, $fileContent, $fileId);
+ }
+
+ $this->eventService->textRecognized($result, $file);
+ } finally {
+ $this->shutdownUserEnvironment();
+ }
+ }
+
+ /**
+ * * @param string $uid The owners userId of the file to be processed
+ */
+ private function initUserEnvironment(string $uid) : void {
+ /** @var IUser */
+ $user = $this->userManager->get($uid);
+ if (!$user) {
+ throw new NoUserException("User with uid '$uid' was not found");
+ }
+
+ $this->userSession->setUser($user);
+ $this->filesystem->init($uid, '/' . $uid . '/files');
+ }
+
+ private function shutdownUserEnvironment() : void {
+ $this->userSession->setUser(null);
+ }
+
+ private function getNode(int $fileId) : ?Node {
+ /** @var File[] */
+ $nodeArr = $this->rootFolder->getById($fileId);
+ if (count($nodeArr) === 0) {
+ throw new NotFoundException('Could not process file with id \'' . $fileId . '\'. File was not found');
+ }
+
+ $node = array_shift($nodeArr);
+
+ if (!$node instanceof Node || $node->getType() !== FileInfo::TYPE_FILE) {
+ throw new \InvalidArgumentException('Skipping process for file with id \'' . $fileId . '\'. It is not a file');
+ }
+
+ return $node;
}
private function processTagsAfterSuccessfulOcr(File $file, WorkflowSettings $settings) : void {
@@ -84,4 +202,27 @@ private function processTagsAfterSuccessfulOcr(File $file, WorkflowSettings $set
}
}
}
+
+ /**
+ * @param string $filePath The filepath of the file to write
+ * @param string $ocrContent The new filecontent (which was OCR processed)
+ * @param int $fileId The id of the file to write. Used for locking.
+ */
+ private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId) : void {
+ $dirPath = dirname($filePath);
+ $filename = basename($filePath);
+
+ $this->processingFileAccessor->setCurrentlyProcessedFileId($fileId);
+
+ try {
+ $view = $this->viewFactory->create($dirPath);
+ // Create new file or file-version with OCR-file
+ // This will trigger 'postWrite' event which would normally
+ // add the file to the queue again but this is tackled
+ // by the processingFileAccessor.
+ $view->file_put_contents($filename, $ocrContent);
+ } finally {
+ $this->processingFileAccessor->setCurrentlyProcessedFileId(null);
+ }
+ }
}
diff --git a/tests/Integration/Notification/NotificationTest.php b/tests/Integration/Notification/NotificationTest.php
index 70e277a..b991ec1 100644
--- a/tests/Integration/Notification/NotificationTest.php
+++ b/tests/Integration/Notification/NotificationTest.php
@@ -23,51 +23,23 @@
use OC\BackgroundJob\JobList;
use OCA\WorkflowOcr\BackgroundJobs\ProcessFileJob;
use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
-use OCA\WorkflowOcr\Helper\IProcessingFileAccessor;
-use OCA\WorkflowOcr\Service\IEventService;
use OCA\WorkflowOcr\Service\INotificationService;
use OCA\WorkflowOcr\Service\IOcrService;
use OCA\WorkflowOcr\Service\NotificationService;
-use OCA\WorkflowOcr\Wrapper\IFilesystem;
-use OCA\WorkflowOcr\Wrapper\IViewFactory;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\DB\QueryBuilder\IExpressionBuilder;
use OCP\DB\QueryBuilder\IQueryBuilder;
-use OCP\Files\File;
-use OCP\Files\FileInfo;
-use OCP\Files\IRootFolder;
use OCP\IConfig;
use OCP\IDBConnection;
-use OCP\IUser;
-use OCP\IUserManager;
-use OCP\IUserSession;
use PHPUnit\Framework\MockObject\MockObject;
use PHPUnit\Framework\TestCase;
use Psr\Log\LoggerInterface;
class NotificationTest extends TestCase {
- /** @var AppFake */
- private $appFake;
/** @var LoggerInterface|MockObject */
private $logger;
- /** @var IRootFolder|MockObject */
- private $rootFolder;
/** @var IOcrService|MockObject */
private $ocrService;
- /** @var IEventService|MockObject */
- private $eventService;
- /** @var IViewFactory|MockObject */
- private $viewFactory;
- /** @var IFilesystem|MockObject */
- private $filesystem;
- /** @var IUserSession|MockObject */
- private $userSession;
- /** @var IUserManager|MockObject */
- private $userManager;
- /** @var IUser|MockObject */
- private $user;
- /** @var IProcessingFileAccessor|MockObject */
- private $processingFileAccessor;
/** @var INotificationService|MockObject */
private $notificationService;
/** @var JobList */
@@ -84,34 +56,11 @@ protected function setUp() : void {
$this->notificationService = new NotificationService(\OC::$server->get(\OCP\Notification\IManager::class));
$this->logger = $this->createMock(LoggerInterface::class);
- $this->rootFolder = $this->createMock(IRootFolder::class);
$this->ocrService = $this->createMock(IOcrService::class);
- $this->eventService = $this->createMock(IEventService::class);
- $this->viewFactory = $this->createMock(IViewFactory::class);
- $this->filesystem = $this->createMock(IFilesystem::class);
- $this->userSession = $this->createMock(IUserSession::class);
- $this->processingFileAccessor = $this->createMock(IProcessingFileAccessor::class);
- /** @var MockObject|IUserManager */
- $userManager = $this->createMock(IUserManager::class);
- $user = $this->createMock(IUser::class);
- $userManager->method('get')
- ->withAnyParameters()
- ->willReturn($user);
-
- $this->userManager = $userManager;
- $this->user = $user;
-
$this->processFileJob = new ProcessFileJob(
$this->logger,
- $this->rootFolder,
$this->ocrService,
- $this->eventService,
- $this->viewFactory,
- $this->filesystem,
- $this->userManager,
- $this->userSession,
- $this->processingFileAccessor,
$this->notificationService,
$this->createMock(ITimeFactory::class)
);
@@ -158,13 +107,8 @@ protected function setUp() : void {
}
public function testBackgroundJobCreatesErrorNotificationIfOcrFailed() {
- $fileMock = $this->createValidFileMock();
- $this->rootFolder->method('getById')
- ->with(42)
- ->willReturn([$fileMock]);
-
$this->ocrService->expects($this->once())
- ->method('ocrFile')
+ ->method('runOcrProcess')
->withAnyParameters()
->willThrowException(new OcrNotPossibleException('Some error'));
$appFake = \OC::$server->get(AppFake::class);
@@ -179,25 +123,4 @@ public function testBackgroundJobCreatesErrorNotificationIfOcrFailed() {
$this->assertEquals('ocr_error', $notification->getSubject());
$this->assertEquals('An error occured while executing the OCR process (Some error). Please have a look at your servers logfile for more details.', $notification->getSubjectParameters()['message']);
}
-
- /**
- * @return File|MockObject
- */
- private function createValidFileMock(string $mimeType = 'application/pdf', string $content = 'someFileContent', string $fileExtension = "pdf", string $path = "/admin/files/somefile.pdf") {
- /** @var MockObject|File */
- $fileMock = $this->createMock(File::class);
- $fileMock->method('getType')
- ->willReturn(FileInfo::TYPE_FILE);
- $fileMock->method('getMimeType')
- ->willReturn($mimeType);
- $fileMock->method('getContent')
- ->willReturn($content);
- $fileMock->method('getId')
- ->willReturn(42);
- $fileMock->method('getExtension')
- ->willReturn($fileExtension);
- $fileMock->method('getPath')
- ->willReturn($path);
- return $fileMock;
- }
}
diff --git a/tests/Unit/BackgroundJobs/ProcessFileJobTest.php b/tests/Unit/BackgroundJobs/ProcessFileJobTest.php
index acf20c0..d021291 100644
--- a/tests/Unit/BackgroundJobs/ProcessFileJobTest.php
+++ b/tests/Unit/BackgroundJobs/ProcessFileJobTest.php
@@ -25,30 +25,18 @@
use Exception;
use OC\BackgroundJob\JobList;
-use OC\User\NoUserException;
use OCA\WorkflowOcr\BackgroundJobs\ProcessFileJob;
use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
use OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException;
-use OCA\WorkflowOcr\Helper\IProcessingFileAccessor;
-use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult;
-use OCA\WorkflowOcr\Service\IEventService;
+use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
use OCA\WorkflowOcr\Service\INotificationService;
use OCA\WorkflowOcr\Service\IOcrService;
-use OCA\WorkflowOcr\Wrapper\IFilesystem;
-use OCA\WorkflowOcr\Wrapper\IView;
-use OCA\WorkflowOcr\Wrapper\IViewFactory;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\DB\QueryBuilder\IExpressionBuilder;
use OCP\DB\QueryBuilder\IQueryBuilder;
-use OCP\Files\File;
-use OCP\Files\FileInfo;
-use OCP\Files\IRootFolder;
-use OCP\Files\Node;
+use OCP\Files\NotFoundException;
use OCP\IConfig;
use OCP\IDBConnection;
-use OCP\IUser;
-use OCP\IUserManager;
-use OCP\IUserSession;
use PHPUnit\Framework\MockObject\MockObject;
use PHPUnit\Framework\TestCase;
use Psr\Log\LoggerInterface;
@@ -56,75 +44,28 @@
class ProcessFileJobTest extends TestCase {
/** @var LoggerInterface|MockObject */
private $logger;
- /** @var IRootFolder|MockObject */
- private $rootFolder;
+
/** @var IOcrService|MockObject */
private $ocrService;
- /** @var IEventService|MockObject */
- private $eventService;
- /** @var IViewFactory|MockObject */
- private $viewFactory;
- /** @var IFilesystem|MockObject */
- private $filesystem;
- /** @var IUserSession|MockObject */
- private $userSession;
- /** @var IUserManager|MockObject */
- private $userManager;
- /** @var IUser|MockObject */
- private $user;
- /** @var IProcessingFileAccessor|MockObject */
- private $processingFileAccessor;
+
/** @var INotificationService|MockObject */
private $notificationService;
/** @var JobList */
private $jobList;
/** @var ProcessFileJob */
private $processFileJob;
- /** @var File[] */
- private $rootFolderGetById42ReturnValue;
+
public function setUp() : void {
parent::setUp();
$this->logger = $this->createMock(LoggerInterface::class);
$this->ocrService = $this->createMock(IOcrService::class);
- $this->eventService = $this->createMock(IEventService::class);
- $this->viewFactory = $this->createMock(IViewFactory::class);
- $this->filesystem = $this->createMock(IFilesystem::class);
- $this->userSession = $this->createMock(IUserSession::class);
- $this->processingFileAccessor = $this->createMock(IProcessingFileAccessor::class);
$this->notificationService = $this->createMock(INotificationService::class);
-
- /** @var MockObject|IRootFolder */
- $this->rootFolder = $this->createMock(IRootFolder::class);
- $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock()];
- $this->rootFolder->expects($this->any())
- ->method('getById')
- ->with(42)
- ->willReturnCallback(function () {
- return $this->rootFolderGetById42ReturnValue;
- });
-
- /** @var MockObject|IUserManager */
- $userManager = $this->createMock(IUserManager::class);
- $user = $this->createMock(IUser::class);
- $userManager->method('get')
- ->withAnyParameters()
- ->willReturn($user);
-
- $this->userManager = $userManager;
- $this->user = $user;
$this->processFileJob = new ProcessFileJob(
$this->logger,
- $this->rootFolder,
$this->ocrService,
- $this->eventService,
- $this->viewFactory,
- $this->filesystem,
- $this->userManager,
- $this->userSession,
- $this->processingFileAccessor,
$this->notificationService,
$this->createMock(ITimeFactory::class)
);
@@ -170,22 +111,15 @@ public function setUp() : void {
]);
}
- public function testCatchesExceptionAndResetsUserEnvironment() {
+ public function testCatchesException() {
$exception = new Exception('someEx');
- $this->filesystem->method('init')
+ $this->ocrService->method('runOcrProcess')
->willThrowException($exception);
$this->logger->expects($this->once())
->method('error')
->with($exception->getMessage(), ['exception' => $exception]);
- // Make sure user-environment is reset after any exception
- // so the user should be set on beginning but should also
- // be reset to null after any run.
- $this->userSession->expects($this->exactly(2))
- ->method('setUser')
- ->withConsecutive([$this->user], [null]);
-
$this->processFileJob->start($this->jobList);
}
@@ -194,14 +128,8 @@ public function testCatchesExceptionAndResetsUserEnvironment() {
*/
public function testLogsErrorAndDoesNothingOnInvalidArguments($argument, $errorMessagePart) {
$this->processFileJob->setArgument($argument);
- $this->filesystem->expects($this->never())
- ->method('init')
- ->withAnyParameters();
$this->ocrService->expects($this->never())
- ->method('ocrFile')
- ->withAnyParameters();
- $this->viewFactory->expects($this->never())
- ->method('create')
+ ->method('runOcrProcess')
->withAnyParameters();
$this->logger->expects($this->once())
->method('error')
@@ -212,104 +140,17 @@ public function testLogsErrorAndDoesNothingOnInvalidArguments($argument, $errorM
$this->processFileJob->start($this->jobList);
}
- /**
- * @dataProvider dataProvider_ValidArguments
- */
- public function testCallsInitFilesystem(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) {
- $this->processFileJob->setArgument($arguments);
- $this->filesystem->expects($this->once())
- ->method('init')
- ->with($user, $rootFolderPath);
-
- $this->processFileJob->start($this->jobList);
- }
-
- /**
- * @dataProvider dataProvider_ValidArguments
- */
- public function testCallsGetOnRootFolder(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) {
- $this->processFileJob->setArgument($arguments);
- $this->rootFolder->expects($this->once())
- ->method('getById')
- ->with(42);
-
- $this->processFileJob->start($this->jobList);
- }
-
- /**
- * @dataProvider dataProvider_ValidArguments
- */
- public function testCallsOcr_IfIsFile(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) {
- $this->processFileJob->setArgument($arguments);
-
- $mimeType = 'application/pdf';
- $content = 'someFileContent';
- $fileMock = $this->createValidFileMock($mimeType, $content);
- $this->rootFolderGetById42ReturnValue = [$fileMock];
-
- $this->ocrService->expects($this->once())
- ->method('ocrFile')
- ->with($fileMock);
-
- $this->processFileJob->start($this->jobList);
- }
-
- /**
- * @dataProvider dataProvider_ValidArguments
- */
- public function testCreatesNewFileVersionAndEmitsTextRecognizedEvent(array $arguments, string $user, string $rootFolderPath, string $originalFileName, string $expectedOcrFilename) {
- $this->processFileJob->setArgument($arguments);
- $mimeType = 'application/pdf';
- $content = 'someFileContent';
- $ocrContent = 'someOcrProcessedFile';
- $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors
- $originalFileMock = $this->createValidFileMock($mimeType, $content, $rootFolderPath, $originalFileName);
-
- $this->rootFolderGetById42ReturnValue = [$originalFileMock];
-
- $this->ocrService->expects($this->once())
- ->method('ocrFile')
- ->willReturn($ocrResult);
-
- /** @var MockObject|IView */
- $viewMock = $this->createMock(IView::class);
- $viewMock->expects($this->once())
- ->method('file_put_contents')
- ->with($expectedOcrFilename, $ocrContent);
- $this->viewFactory->expects($this->once())
- ->method('create')
- ->with($rootFolderPath)
- ->willReturn($viewMock);
-
- $this->eventService->expects($this->once())
- ->method('textRecognized')
- ->with($ocrResult, $originalFileMock);
-
- $this->processFileJob->start($this->jobList);
- }
-
- public function testNotFoundLogsErrorAndSendsNotification_AndDoesNothingAfterwards() {
- $this->rootFolderGetById42ReturnValue = [];
+ public function testNotFoundLogsErrorAndSendsNotification() {
+ $this->ocrService->method('runOcrProcess')
+ ->willThrowException(new NotFoundException('File was not found'));
$this->logger->expects($this->once())
->method('error')
- ->with($this->stringContains('not found'));
+ ->with($this->stringContains('File was not found'), $this->callback(function ($subject) {
+ return is_array($subject) && ($subject['exception'] instanceof NotFoundException);
+ }));
$this->notificationService->expects($this->once())
->method('createErrorNotification')
->with($this->stringContains('An error occured while executing the OCR process (') && $this->stringContains('File was not found'));
- $this->ocrService->expects($this->never())
- ->method('ocrFile');
-
- $this->processFileJob->start($this->jobList);
- }
-
- /**
- * @dataProvider dataProvider_InvalidNodes
- */
- public function testDoesNotCallOcr_OnNonFile($invalidNode) {
- $this->rootFolderGetById42ReturnValue = [$invalidNode];
-
- $this->ocrService->expects($this->never())
- ->method('ocrFile');
$this->processFileJob->start($this->jobList);
}
@@ -318,156 +159,25 @@ public function testDoesNotCallOcr_OnNonFile($invalidNode) {
* @dataProvider dataProvider_OcrExceptions
*/
public function testLogsError_OnOcrException(Exception $exception) {
- $fileMock = $this->createValidFileMock();
- $this->rootFolder->method('get')
- ->with('/admin/files/somefile.pdf')
- ->willReturn($fileMock);
-
- $this->ocrService->method('ocrFile')
+ $this->ocrService->method('runOcrProcess')
->willThrowException($exception);
$this->logger->expects($this->once())
->method('error');
- $this->viewFactory->expects($this->never())
- ->method('create');
-
- $this->processFileJob->start($this->jobList);
- }
-
- public function testThrowsNoUserException_OnNonExistingUser() {
- // Unfortunately method definitions can't yet be overwritten in
- // PHPUnit, see https://github.com/sebastianbergmann/phpunit-documentation-english/issues/169
- /** @var IUserManager|MockObject */
- $userManager = $this->createMock(IUserManager::class);
- $userManager->method('get')
- ->with('nonexistinguser')
- ->willReturn(null);
-
- $this->logger->expects($this->once())
- ->method('error')
- ->with($this->stringContains('nonexistinguser'), $this->callback(function ($subject) {
- return is_array($subject) && ($subject['exception'] instanceof NoUserException);
- }));
-
- $processFileJob = new ProcessFileJob(
- $this->logger,
- $this->rootFolder,
- $this->ocrService,
- $this->eventService,
- $this->viewFactory,
- $this->filesystem,
- $userManager,
- $this->userSession,
- $this->processingFileAccessor,
- $this->notificationService,
- $this->createMock(ITimeFactory::class)
- );
- $processFileJob->setId(111);
- $arguments = ['fileId' => 42, 'uid' => 'nonexistinguser', 'settings' => '{}'];
- $processFileJob->setArgument($arguments);
-
- $processFileJob->start($this->jobList);
- }
-
- /**
- * @dataProvider dataProvider_ValidArguments
- */
- public function testCallsProcessingFileAccessor(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) {
- $this->processFileJob->setArgument($arguments);
- $mimeType = 'application/pdf';
- $content = 'someFileContent';
- $ocrContent = 'someOcrProcessedFile';
- $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors
-
- $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock($mimeType, $content)];
-
- $this->ocrService->expects($this->once())
- ->method('ocrFile')
- ->willReturn($ocrResult);
-
- $viewMock = $this->createMock(IView::class);
- $this->viewFactory->expects($this->once())
- ->method('create')
- ->willReturn($viewMock);
-
- $calledWithFileId42 = 0;
- $calledWithNull = 0;
- $withIdCalledFirst = false;
-
- $this->processingFileAccessor->expects($this->exactly(2))
- ->method('setCurrentlyProcessedFileId')
- ->with($this->callback(function ($id) use (&$calledWithFileId42, &$calledWithNull, &$withIdCalledFirst) {
- if ($id === 42) {
- $calledWithFileId42++;
- $withIdCalledFirst = $calledWithNull === 0;
- } elseif ($id === null) {
- $calledWithNull++;
- }
-
- return true;
- }));
-
- $this->processFileJob->start($this->jobList);
-
- $this->assertEquals(1, $calledWithFileId42);
- $this->assertEquals(1, $calledWithNull);
- $this->assertTrue($withIdCalledFirst);
- }
-
- /**
- * @dataProvider dataProvider_ValidArguments
- */
- public function testDoesNotCreateNewFileVersionIfOcrContentWasEmpty(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) {
- $this->processFileJob->setArgument($arguments);
- $mimeType = 'application/pdf';
- $content = 'someFileContent';
- $ocrContent = '';
- $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent);
- $fileId = $arguments['fileId'];
-
- $this->rootFolder->expects($this->once())
- ->method('getById')
- ->with($fileId)
- ->willReturn([$this->createValidFileMock($mimeType, $content)]);
-
- $this->ocrService->expects($this->once())
- ->method('ocrFile')
- ->willReturn($ocrResult);
-
- $viewMock = $this->createMock(IView::class);
- $this->viewFactory->expects($this->never())
- ->method('create')
- ->willReturn($viewMock);
-
- $this->processingFileAccessor->expects($this->never())
- ->method('setCurrentlyProcessedFileId');
-
- $this->eventService->expects($this->once())
- ->method('textRecognized');
-
$this->processFileJob->start($this->jobList);
}
public function testLogsNonOcrExceptionsFromOcrService() {
- $mimeType = 'application/pdf';
- $content = 'someFileContent';
$exception = new \Exception('someException');
- $fileMock = $this->createValidFileMock($mimeType, $content);
- $this->rootFolder->method('get')
- ->willReturn($fileMock);
-
$this->ocrService->expects($this->once())
- ->method('ocrFile')
+ ->method('runOcrProcess')
->willThrowException($exception);
$this->logger->expects($this->once())
->method('error');
- $this->viewFactory->expects($this->never())
- ->method('create');
-
$this->processFileJob->start($this->jobList);
}
@@ -479,55 +189,11 @@ public function dataProvider_InvalidArguments() {
return $arr;
}
- public function dataProvider_ValidArguments() {
- $arr = [
- [['fileId' => 42, 'uid' => 'admin', 'settings' => '{}'], 'admin', '/admin/files', 'somefile.pdf', 'somefile.pdf'],
- [['fileId' => 42, 'uid' => 'myuser', 'settings' => '{}'], 'myuser', '/myuser/files', 'someotherfile.jpg', 'someotherfile.jpg.pdf']
- ];
- return $arr;
- }
-
- public function dataProvider_InvalidNodes() {
- /** @var MockObject|Node */
- $folderMock = $this->createMock(Node::class);
- $folderMock->method('getType')
- ->willReturn(FileInfo::TYPE_FOLDER);
- $fileInfoMock = $this->createMock(FileInfo::class);
- $arr = [
- [$folderMock],
- [$fileInfoMock],
- [null],
- [(object)['someField' => 'someValue']]
- ];
- return $arr;
- }
-
public function dataProvider_OcrExceptions() {
return [
[new OcrNotPossibleException('Ocr not possible')],
- [new OcrProcessorNotFoundException('audio/mpeg')]
+ [new OcrProcessorNotFoundException('audio/mpeg')],
+ [new OcrResultEmptyException('Ocr result was empty')]
];
}
-
- /**
- * @return File|MockObject
- */
- private function createValidFileMock(string $mimeType = 'application/pdf', string $content = 'someFileContent', string $rootFolderPath = '/admin/files', string $fileName = 'somefile.pdf') {
- /** @var MockObject|File */
- $fileMock = $this->createMock(File::class);
- $fileMock->method('getType')
- ->willReturn(FileInfo::TYPE_FILE);
- $fileMock->method('getMimeType')
- ->willReturn($mimeType);
- $fileMock->method('getContent')
- ->willReturn($content);
- $fileMock->method('getId')
- ->willReturn(42);
- $fileMock->method('getPath')
- ->willReturn("$rootFolderPath/$fileName");
- #get extension from filename
- $fileMock->method('getExtension')
- ->willReturn(pathinfo($fileName, PATHINFO_EXTENSION));
- return $fileMock;
- }
}
diff --git a/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php b/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php
index 99d1e18..d372656 100644
--- a/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php
+++ b/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php
@@ -24,6 +24,7 @@
namespace OCA\WorkflowOcr\Tests\Unit\OcrProcessors;
use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
+use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
use OCA\WorkflowOcr\Helper\ISidecarFileAccessor;
use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Model\WorkflowSettings;
@@ -176,7 +177,7 @@ public function testThrowsErrorIfOcrFileWasEmpty() {
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
} catch (\Throwable $t) {
$thrown = true;
- $this->assertInstanceOf(OcrNotPossibleException::class, $t);
+ $this->assertInstanceOf(OcrResultEmptyException::class, $t);
$this->assertEquals('OCRmyPDF did not produce any output for file /admin/files/somefile.pdf', $t->getMessage());
}
diff --git a/tests/Unit/Service/OcrServiceTest.php b/tests/Unit/Service/OcrServiceTest.php
index 1e6313a..e514472 100644
--- a/tests/Unit/Service/OcrServiceTest.php
+++ b/tests/Unit/Service/OcrServiceTest.php
@@ -23,13 +23,30 @@
namespace OCA\WorkflowOcr\Tests\Unit\Service;
+use Exception;
+use InvalidArgumentException;
+use OC\User\NoUserException;
+use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
+use OCA\WorkflowOcr\Helper\IProcessingFileAccessor;
use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Model\WorkflowSettings;
use OCA\WorkflowOcr\OcrProcessors\IOcrProcessor;
use OCA\WorkflowOcr\OcrProcessors\IOcrProcessorFactory;
+use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult;
+use OCA\WorkflowOcr\Service\IEventService;
use OCA\WorkflowOcr\Service\IGlobalSettingsService;
use OCA\WorkflowOcr\Service\OcrService;
+use OCA\WorkflowOcr\Wrapper\IFilesystem;
+use OCA\WorkflowOcr\Wrapper\IView;
+use OCA\WorkflowOcr\Wrapper\IViewFactory;
use OCP\Files\File;
+use OCP\Files\FileInfo;
+use OCP\Files\IRootFolder;
+use OCP\Files\Node;
+use OCP\Files\NotFoundException;
+use OCP\IUser;
+use OCP\IUserManager;
+use OCP\IUserSession;
use OCP\SystemTag\ISystemTagObjectMapper;
use PHPUnit\Framework\MockObject\MockObject;
use PHPUnit\Framework\TestCase;
@@ -46,6 +63,24 @@ class OcrServiceTest extends TestCase {
private $systemTagObjectMapper;
/** @var LoggerInterface|MockObject */
private $logger;
+ /** @var IRootFolder|MockObject */
+ private $rootFolder;
+ /** @var IEventService|MockObject */
+ private $eventService;
+ /** @var IViewFactory|MockObject */
+ private $viewFactory;
+ /** @var IFilesystem|MockObject */
+ private $filesystem;
+ /** @var IUserSession|MockObject */
+ private $userSession;
+ /** @var IUserManager|MockObject */
+ private $userManager;
+ /** @var IUser|MockObject */
+ private $user;
+ /** @var IProcessingFileAccessor|MockObject */
+ private $processingFileAccessor;
+ /** @var File[] */
+ private $rootFolderGetById42ReturnValue;
/** @var OcrService */
private $ocrService;
/** @var File|MockObject */
@@ -55,12 +90,54 @@ public function setUp() : void {
parent::setUp();
$this->globalSettingsService = $this->createMock(IGlobalSettingsService::class);
- $this->ocrProcessorFactory = $this->createMock(IOcrProcessorFactory::class);
$this->ocrProcessor = $this->createMock(IOcrProcessor::class);
$this->systemTagObjectMapper = $this->createMock(ISystemTagObjectMapper::class);
$this->logger = $this->createMock(LoggerInterface::class);
$this->fileInput = $this->createMock(File::class);
- $this->ocrService = new OcrService($this->ocrProcessorFactory, $this->globalSettingsService, $this->systemTagObjectMapper, $this->logger);
+ $this->eventService = $this->createMock(IEventService::class);
+ $this->viewFactory = $this->createMock(IViewFactory::class);
+ $this->filesystem = $this->createMock(IFilesystem::class);
+ $this->userSession = $this->createMock(IUserSession::class);
+ $this->processingFileAccessor = $this->createMock(IProcessingFileAccessor::class);
+
+ /** @var MockObject|IRootFolder */
+ $this->rootFolder = $this->createMock(IRootFolder::class);
+ $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock()];
+ $this->rootFolder->expects($this->any())
+ ->method('getById')
+ ->with(42)
+ ->willReturnCallback(function () {
+ return $this->rootFolderGetById42ReturnValue;
+ });
+
+ /** @var MockObject|IUserManager */
+ $userManager = $this->createMock(IUserManager::class);
+ $user = $this->createMock(IUser::class);
+ $userManager->method('get')
+ ->withAnyParameters()
+ ->willReturn($user);
+
+ $this->userManager = $userManager;
+ $this->user = $user;
+
+ /** @var MockObject|IOcrProcessorFactory */
+ $this->ocrProcessorFactory = $this->createMock(IOcrProcessorFactory::class);
+ $this->ocrProcessorFactory->method('create')
+ ->withAnyParameters()
+ ->willReturn($this->ocrProcessor);
+
+ $this->ocrService = new OcrService(
+ $this->ocrProcessorFactory,
+ $this->globalSettingsService,
+ $this->systemTagObjectMapper,
+ $this->userManager,
+ $this->filesystem,
+ $this->userSession,
+ $this->rootFolder,
+ $this->eventService,
+ $this->viewFactory,
+ $this->processingFileAccessor,
+ $this->logger);
}
public function testCallsOcrProcessor_WithCorrectArguments() {
@@ -87,7 +164,7 @@ public function testCallsOcrProcessor_WithCorrectArguments() {
->method('ocrFile')
->with($this->fileInput, $settings, $globalSettings);
- $this->ocrService->ocrFile($this->fileInput, $settings);
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
}
public function testCallsSystemTagObjectManager_WithCorrectArguments() {
@@ -124,7 +201,7 @@ public function testCallsSystemTagObjectManager_WithCorrectArguments() {
->method('assignTags')
->withConsecutive(["42", "files", 3], ["42", "files", 4]);
- $this->ocrService->ocrFile($this->fileInput, $settings);
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
}
public function testCatchesTagNotFoundException() {
@@ -160,6 +237,333 @@ public function testCatchesTagNotFoundException() {
$this->logger->expects($this->exactly(2))
->method('warning');
- $this->ocrService->ocrFile($this->fileInput, $settings);
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ }
+
+ public function testResetsUserEnvironmentOnException() {
+ $settings = new WorkflowSettings();
+ $exception = new Exception('someEx');
+ $this->filesystem->method('init')
+ ->willThrowException($exception);
+
+ // Make sure user-environment is reset after any exception
+ // so the user should be set on beginning but should also
+ // be reset to null after any run.
+ $this->userSession->expects($this->exactly(2))
+ ->method('setUser')
+ ->withConsecutive([$this->user], [null]);
+
+ $thrown = false;
+ try {
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ } catch (Exception $e) {
+ $this->assertEquals($exception, $e);
+ $thrown = true;
+ }
+ $this->assertTrue($thrown);
+ }
+
+ public function testCallsInitFilesystem() {
+ $settings = new WorkflowSettings();
+ $this->filesystem->expects($this->once())
+ ->method('init')
+ ->with('usr', '/usr/files');
+
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ }
+
+ public function testCallsGetOnRootFolder() {
+ $settings = new WorkflowSettings();
+ $this->rootFolder->expects($this->once())
+ ->method('getById')
+ ->with(42);
+
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ }
+
+ public function testCallsOcrProcessorWithFile() {
+ $globalSettings = new GlobalSettings();
+ $settings = new WorkflowSettings();
+
+ $mimeType = 'application/pdf';
+ $content = 'someFileContent';
+ $fileMock = $this->createValidFileMock($mimeType, $content);
+ $this->rootFolderGetById42ReturnValue = [$fileMock];
+
+ $this->globalSettingsService->expects($this->once())
+ ->method('getGlobalSettings')
+ ->willReturn($globalSettings);
+
+ $this->ocrProcessor->expects($this->once())
+ ->method('ocrFile')
+ ->with($fileMock, $settings, $globalSettings);
+
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ }
+
+ /**
+ * @dataProvider dataProvider_OriginalAndNewFilesnames
+ */
+ public function testCreatesNewFileVersionAndEmitsTextRecognizedEvent(string $originalFilename, string $expectedOcrFilename) {
+ $settings = new WorkflowSettings();
+
+ $rootFolderPath = '/usr/files';
+ $originalFilename = 'somefile.pdf';
+ $expectedOcrFilename = 'somefile.pdf';
+ $mimeType = 'application/pdf';
+ $content = 'someFileContent';
+ $ocrContent = 'someOcrProcessedFile';
+ $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors
+ $originalFileMock = $this->createValidFileMock($mimeType, $content, $rootFolderPath, $originalFilename);
+
+ $this->rootFolderGetById42ReturnValue = [$originalFileMock];
+
+ $this->ocrProcessor->expects($this->once())
+ ->method('ocrFile')
+ ->willReturn($ocrResult);
+
+ /** @var MockObject|IView */
+ $viewMock = $this->createMock(IView::class);
+ $viewMock->expects($this->once())
+ ->method('file_put_contents')
+ ->with($expectedOcrFilename, $ocrContent);
+ $this->viewFactory->expects($this->once())
+ ->method('create')
+ ->with($rootFolderPath)
+ ->willReturn($viewMock);
+
+ $this->eventService->expects($this->once())
+ ->method('textRecognized')
+ ->with($ocrResult, $originalFileMock);
+
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ }
+
+ public function testThrowsNotFoundExceptionWhenFileNotFound() {
+ $settings = new WorkflowSettings();
+ $this->rootFolderGetById42ReturnValue = [];
+
+ $this->expectException(NotFoundException::class);
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ }
+
+ /**
+ * @dataProvider dataProvider_InvalidNodes
+ */
+ public function testDoesNotCallOcr_OnNonFile($invalidNode) {
+ $settings = new WorkflowSettings();
+ $this->rootFolderGetById42ReturnValue = [$invalidNode];
+
+ $this->ocrProcessor->expects($this->never())
+ ->method('ocrFile');
+
+ $this->expectException(InvalidArgumentException::class);
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+ }
+
+ public function testThrowsNoUserException_OnNonExistingUser() {
+ $settings = new WorkflowSettings();
+
+ // Unfortunately method definitions can't yet be overwritten in
+ // PHPUnit, see https://github.com/sebastianbergmann/phpunit-documentation-english/issues/169
+ /** @var IUserManager|MockObject */
+ $userManager = $this->createMock(IUserManager::class);
+ $userManager->method('get')
+ ->with('nonexistinguser')
+ ->willReturn(null);
+
+ $this->ocrService = new OcrService(
+ $this->ocrProcessorFactory,
+ $this->globalSettingsService,
+ $this->systemTagObjectMapper,
+ $userManager,
+ $this->filesystem,
+ $this->userSession,
+ $this->rootFolder,
+ $this->eventService,
+ $this->viewFactory,
+ $this->processingFileAccessor,
+ $this->logger);
+
+ $thrown = false;
+ try {
+ $this->ocrService->runOcrProcess(42, 'nonexistinguser', $settings);
+ } catch (NoUserException $e) {
+ $this->assertTrue(str_contains($e->getMessage(), 'nonexistinguser'));
+ $thrown = true;
+ }
+ $this->assertTrue($thrown);
+ }
+
+ public function testCallsProcessingFileAccessor() {
+ $settings = new WorkflowSettings();
+ $mimeType = 'application/pdf';
+ $content = 'someFileContent';
+ $ocrContent = 'someOcrProcessedFile';
+ $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors
+
+ $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock($mimeType, $content)];
+
+ $this->ocrProcessor->expects($this->once())
+ ->method('ocrFile')
+ ->willReturn($ocrResult);
+
+ $viewMock = $this->createMock(IView::class);
+ $this->viewFactory->expects($this->once())
+ ->method('create')
+ ->willReturn($viewMock);
+
+ $calledWithFileId42 = 0;
+ $calledWithNull = 0;
+ $withIdCalledFirst = false;
+
+ $this->processingFileAccessor->expects($this->exactly(2))
+ ->method('setCurrentlyProcessedFileId')
+ ->with($this->callback(function ($id) use (&$calledWithFileId42, &$calledWithNull, &$withIdCalledFirst) {
+ if ($id === 42) {
+ $calledWithFileId42++;
+ $withIdCalledFirst = $calledWithNull === 0;
+ } elseif ($id === null) {
+ $calledWithNull++;
+ }
+
+ return true;
+ }));
+
+ $this->ocrService->runOcrProcess(42, 'usr', $settings);
+
+ $this->assertEquals(1, $calledWithFileId42);
+ $this->assertEquals(1, $calledWithNull);
+ $this->assertTrue($withIdCalledFirst);
+ }
+
+ public function testDoesNotCreateNewFileVersionIfOcrContentWasEmpty() {
+ $settings = new WorkflowSettings();
+ $user = 'usr';
+ $mimeType = 'application/pdf';
+ $content = 'someFileContent';
+ $ocrContent = '';
+ $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent);
+ $fileId = 42;
+
+ $this->rootFolder->expects($this->once())
+ ->method('getById')
+ ->with($fileId)
+ ->willReturn([$this->createValidFileMock($mimeType, $content)]);
+
+ $this->ocrProcessor->expects($this->once())
+ ->method('ocrFile')
+ ->willReturn($ocrResult);
+
+ $viewMock = $this->createMock(IView::class);
+ $this->viewFactory->expects($this->never())
+ ->method('create')
+ ->willReturn($viewMock);
+
+ $this->processingFileAccessor->expects($this->never())
+ ->method('setCurrentlyProcessedFileId');
+
+ $this->eventService->expects($this->once())
+ ->method('textRecognized');
+
+ $this->ocrService->runOcrProcess($fileId, $user, $settings);
+ }
+
+ public function testOcrSkippedIfOcrModeIsSkipFileAndResultIsEmpty() {
+ $fileId = 42;
+ $settings = new WorkflowSettings('{"ocrMode": ' . WorkflowSettings::OCR_MODE_SKIP_FILE . '}');
+
+ $this->ocrProcessor->expects($this->once())
+ ->method('ocrFile')
+ ->willThrowException(new OcrResultEmptyException('oops'));
+ $this->logger->expects($this->once())
+ ->method('debug')
+ ->with($this->stringStartsWith('Skipping empty OCR result for file with id'), ['fileId' => $fileId]);
+
+ $this->ocrService->runOcrProcess($fileId, 'usr', $settings);
+ }
+
+ /**
+ * @dataProvider dataProvider_OcrModesThrowOnEmptyResult
+ */
+ public function testOcrEmptyExceptionIsThrown(int $ocrMode) {
+ $fileId = 42;
+ $settings = new WorkflowSettings('{"ocrMode": ' . $ocrMode . '}');
+ $ex = new OcrResultEmptyException('oops');
+
+ $this->ocrProcessor->expects($this->once())
+ ->method('ocrFile')
+ ->willThrowException($ex);
+ $this->logger->expects($this->never())
+ ->method('debug');
+
+ $thrown = false;
+ try {
+ $this->ocrService->runOcrProcess($fileId, 'usr', $settings);
+ } catch (OcrResultEmptyException $e) {
+ $this->assertEquals($ex, $e);
+ $thrown = true;
+ }
+ $this->assertTrue($thrown);
+ }
+
+ public function dataProvider_InvalidNodes() {
+ /** @var MockObject|Node */
+ $folderMock = $this->createMock(Node::class);
+ $folderMock->method('getType')
+ ->willReturn(FileInfo::TYPE_FOLDER);
+ $fileInfoMock = $this->createMock(FileInfo::class);
+ $arr = [
+ [$folderMock],
+ [$fileInfoMock],
+ [null],
+ [(object)['someField' => 'someValue']]
+ ];
+ return $arr;
+ }
+
+ public function dataProvider_OcrModesThrowOnEmptyResult() {
+ return [
+ [WorkflowSettings::OCR_MODE_FORCE_OCR],
+ [WorkflowSettings::OCR_MODE_SKIP_TEXT],
+ [WorkflowSettings::OCR_MODE_REDO_OCR]
+ ];
}
+
+ public function dataProvider_OriginalAndNewFilesnames() {
+ return [
+ ['somefile.pdf', 'somefile.pdf'],
+ ['somefile.jpg', 'somefile.jpg.pdf']
+ ];
+ }
+
+ /**
+ * @return File|MockObject
+ */
+ private function createValidFileMock(string $mimeType = 'application/pdf', string $content = 'someFileContent', string $rootFolderPath = '/admin/files', string $fileName = 'somefile.pdf') {
+ /** @var MockObject|File */
+ $fileMock = $this->createMock(File::class);
+ $fileMock->method('getType')
+ ->willReturn(FileInfo::TYPE_FILE);
+ $fileMock->method('getMimeType')
+ ->willReturn($mimeType);
+ $fileMock->method('getContent')
+ ->willReturn($content);
+ $fileMock->method('getId')
+ ->willReturn(42);
+ $fileMock->method('getPath')
+ ->willReturn("$rootFolderPath/$fileName");
+ #get extension from filename
+ $fileMock->method('getExtension')
+ ->willReturn(pathinfo($fileName, PATHINFO_EXTENSION));
+ return $fileMock;
+ }
+
+ // public function dataProvider_ValidArguments() {
+ // $arr = [
+ // [['fileId' => 42, 'uid' => 'admin', 'settings' => '{}'], 'admin', '/admin/files', 'somefile.pdf', 'somefile.pdf'],
+ // [['fileId' => 42, 'uid' => 'myuser', 'settings' => '{}'], 'myuser', '/myuser/files', 'someotherfile.jpg', 'someotherfile.jpg.pdf']
+ // ];
+ // return $arr;
+ // }
}
diff --git a/tests/psalm-baseline.xml b/tests/psalm-baseline.xml
index 8b9d31b..3fa0ede 100644
--- a/tests/psalm-baseline.xml
+++ b/tests/psalm-baseline.xml
@@ -1,15 +1,5 @@
-
-
- $this->rootFolder
- IRootFolder
- IRootFolder
-
-
- NoUserException
-
-
$this->rootFolder
@@ -35,6 +25,16 @@
$settings
+
+
+ $this->rootFolder
+ IRootFolder
+ IRootFolder
+
+
+ NoUserException
+
+
\OC\Files\Filesystem