diff --git a/README.md b/README.md
index d138bde..53a42f0 100644
--- a/README.md
+++ b/README.md
@@ -137,6 +137,7 @@ Assign tags after OCR | These tags will be assigned to the file after it has bee
Remove tags after OCR | These tags will be removed from the file after it has been successfully processed. If the file does not have the tag, it will just be skipped. |
OCR mode | Controls the way files are processed, which already have OCR content. For PDF files this setting corresponds to the `--skip-text`, `--redo-ocr` and `--force-ocr` parameters of `ocrmypdf`. See [official docs](https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped) for additional information.
**Skip text:** skip pages completely that already contain text. Such a page will not be touched and just be copied to the final output.
**Redo OCR:** perform a detailed text analysis to split up pages into areas with and without text.
**Force OCR:** all pages will be rasterized to images and OCR will be performed on every page. |
Keep original file version | If the switch is set, the original file (before applying OCR) will be kept. This is done by giving the file version the label `Before OC`. This version will be excluded from the automatic expiration process (see [here](https://docs.nextcloud.com/server/latest/user_manual/en/files/version_control.html#naming-a-version) for details) |
+Keep original file modification date | Restore the modification date of the original file. The original modification date will be applied to the newly created file version. This is useful if you need to preserve the file modification date, for example to be able to sort files accordingly. |
Remove background\* | If the switch is set, the OCR processor will try to remove the background of the document before processing and instead set a white background. For PDF files this setting corresponds to the [`--remove-background`](https://ocrmypdf.readthedocs.io/en/latest/cookbook.html?highlight=remove-background#image-processing) parameter of `ocrmypdf`.
:warning: Please note that this flag will currently only work with **`ocrmypdf` versions prior to 13**. It might be added in future versions again. See [here](https://github.com/ocrmypdf/OCRmyPDF/issues/884) for details. :warning:|
Custom ocrMyPdf CLI arguments | If you want to pass custom arguments to the `ocrmypdf` CLI, you can do so here. Please note that the arguments will be passed as they are to the CLI, so make sure to use the correct syntax. Check the [official docs](https://ocrmypdf.readthedocs.io/en/latest/cookbook.html) for more information. |
diff --git a/lib/Model/WorkflowSettings.php b/lib/Model/WorkflowSettings.php
index 637d173..49aee5a 100644
--- a/lib/Model/WorkflowSettings.php
+++ b/lib/Model/WorkflowSettings.php
@@ -52,6 +52,9 @@ class WorkflowSettings {
/** @var bool */
private $keepOriginalFileVersion = false;
+ /** @var bool */
+ private $keepOriginalFileDate = false;
+
/** @var string */
private $customCliArgs = '';
@@ -104,6 +107,13 @@ public function getKeepOriginalFileVersion(): bool {
return $this->keepOriginalFileVersion;
}
+ /**
+ * @return bool
+ */
+ public function getKeepOriginalFileDate(): bool {
+ return $this->keepOriginalFileDate;
+ }
+
/**
* @return string
*/
@@ -143,6 +153,7 @@ private function setJson(?string $json = null) {
$this->setProperty($this->tagsToRemoveAfterOcr, $data, 'tagsToRemoveAfterOcr', fn ($value) => is_array($value));
$this->setProperty($this->tagsToAddAfterOcr, $data, 'tagsToAddAfterOcr', fn ($value) => is_array($value));
$this->setProperty($this->keepOriginalFileVersion, $data, 'keepOriginalFileVersion', fn ($value) => is_bool($value));
+ $this->setProperty($this->keepOriginalFileDate, $data, 'keepOriginalFileDate', fn ($value) => is_bool($value));
$this->setProperty($this->customCliArgs, $data, 'customCliArgs', fn ($value) => is_string($value));
}
diff --git a/lib/Service/OcrService.php b/lib/Service/OcrService.php
index 1e2f068..6fabcbf 100644
--- a/lib/Service/OcrService.php
+++ b/lib/Service/OcrService.php
@@ -121,6 +121,13 @@ public function runOcrProcess(int $fileId, string $uid, WorkflowSettings $settin
$this->initUserEnvironment($uid);
$file = $this->getNode($fileId);
+
+ $fileMtime = null;
+ if ($settings->getKeepOriginalFileDate()) {
+ // Add one ms to the original file modification time to prevent the new original version from being overwritten
+ $fileMtime = $file->getMTime() + 1;
+ }
+
$ocrProcessor = $this->ocrProcessorFactory->create($file->getMimeType());
$globalSettings = $this->globalSettingsService->getGlobalSettings();
@@ -153,7 +160,7 @@ public function runOcrProcess(int $fileId, string $uid, WorkflowSettings $settin
$filePath :
$filePath . '.pdf';
- $this->createNewFileVersion($newFilePath, $fileContent, $fileId);
+ $this->createNewFileVersion($newFilePath, $fileContent, $fileId, $fileMtime);
}
$this->eventService->textRecognized($result, $file);
@@ -180,7 +187,7 @@ private function shutdownUserEnvironment() : void {
$this->userSession->setUser(null);
}
- private function getNode(int $fileId) : ?Node {
+ private function getNode(int $fileId) : Node {
/** @var File[] */
$nodeArr = $this->rootFolder->getById($fileId);
if (count($nodeArr) === 0) {
@@ -223,8 +230,9 @@ private function processTagsAfterSuccessfulOcr(File $file, WorkflowSettings $set
* @param string $filePath The filepath of the file to write
* @param string $ocrContent The new filecontent (which was OCR processed)
* @param int $fileId The id of the file to write. Used for locking.
+ * @param int $fileMtime The mtime of the new file. Can be used to restore the original modification time of the non-OCR file.
*/
- private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId) : void {
+ private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId, ?int $fileMtime = null) : void {
$dirPath = dirname($filePath);
$filename = basename($filePath);
@@ -237,6 +245,11 @@ private function createNewFileVersion(string $filePath, string $ocrContent, int
// add the file to the queue again but this is tackled
// by the processingFileAccessor.
$view->file_put_contents($filename, $ocrContent);
+
+ // Restore the original modification time of the non-OCR file
+ if ($fileMtime !== null) {
+ $view->touch($filename, $fileMtime);
+ }
} finally {
$this->processingFileAccessor->setCurrentlyProcessedFileId(null);
}
diff --git a/lib/Wrapper/IView.php b/lib/Wrapper/IView.php
index 98cdf33..804deff 100644
--- a/lib/Wrapper/IView.php
+++ b/lib/Wrapper/IView.php
@@ -31,4 +31,5 @@
*/
interface IView {
public function file_put_contents(string $filePath, string $content) : bool;
+ public function touch($path, $mtime = null): bool;
}
diff --git a/lib/Wrapper/ViewWrapper.php b/lib/Wrapper/ViewWrapper.php
index bc2912f..6f21acb 100644
--- a/lib/Wrapper/ViewWrapper.php
+++ b/lib/Wrapper/ViewWrapper.php
@@ -43,4 +43,11 @@ public function file_put_contents(string $filePath, string $content) : bool {
$retVal = $this->wrappedView->file_put_contents($filePath, $content);
return boolval($retVal);
}
+
+ /**
+ * @inheritdoc
+ */
+ public function touch($path, $mtime = null): bool {
+ return $this->wrappedView->touch($path, $mtime);
+ }
}
diff --git a/src/components/WorkflowOcr.vue b/src/components/WorkflowOcr.vue
index fb1077b..3a73f26 100644
--- a/src/components/WorkflowOcr.vue
+++ b/src/components/WorkflowOcr.vue
@@ -89,11 +89,16 @@
type="switch">
{{ t('workflow_ocr', 'Remove background') }}
-
wrappedView]]>
wrappedView]]>