From b38708afafbaead1fcf1c54c4f7d3dfec504f045 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 7 Nov 2023 16:28:27 +0100 Subject: [PATCH 1/4] dont reuse etag for folders marked explicitly unscanned Signed-off-by: Robin Appelman --- lib/private/Files/Cache/Scanner.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/private/Files/Cache/Scanner.php b/lib/private/Files/Cache/Scanner.php index 2711fc8ad19cd..e1e3528c9784a 100644 --- a/lib/private/Files/Cache/Scanner.php +++ b/lib/private/Files/Cache/Scanner.php @@ -203,7 +203,8 @@ public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = $fileId = $cacheData['fileid']; $data['fileid'] = $fileId; // only reuse data if the file hasn't explicitly changed - if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) { + $mtimeUnchanged = isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']; + if ($mtimeUnchanged && $cacheData['size'] !== -1) { $data['mtime'] = $cacheData['mtime']; if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) { $data['size'] = $cacheData['size']; From f56a8e7e9e114bcc0c820fd26ab089e779a4f842 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 7 Nov 2023 16:53:45 +0100 Subject: [PATCH 2/4] add test for updating etag with unchanged mtime Signed-off-by: Robin Appelman --- tests/lib/Files/Cache/ScannerTest.php | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/lib/Files/Cache/ScannerTest.php b/tests/lib/Files/Cache/ScannerTest.php index e4c052f602527..52587aeabc765 100644 --- a/tests/lib/Files/Cache/ScannerTest.php +++ b/tests/lib/Files/Cache/ScannerTest.php @@ -404,4 +404,23 @@ public function dataTestIsPartialFile() { ['/sub/folder/foo.txt', false], ]; } + + public function testNoETagUnscannedFolder() { + $this->fillTestFolders(); + + $this->scanner->scan(''); + + $oldFolderEntry = $this->cache->get('folder'); + // create a new file in a folder by keeping the mtime unchanged, but mark the folder as unscanned + $this->storage->file_put_contents('folder/new.txt', 'foo'); + $this->storage->touch('folder', $oldFolderEntry->getMTime()); + $this->cache->update($oldFolderEntry->getId(), ['size' => -1]); + + $this->scanner->scan(''); + + $this->cache->inCache('folder/new.txt'); + + $newFolderEntry = $this->cache->get('folder'); + $this->assertNotEquals($newFolderEntry->getEtag(), $oldFolderEntry->getEtag()); + } } From 404b340ddaee86c7b464ed846ba96aa95c61a835 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Wed, 22 Nov 2023 16:26:57 +0100 Subject: [PATCH 3/4] add test for updating etag with unchanged mtime in child folder Signed-off-by: Robin Appelman --- tests/lib/Files/Cache/ScannerTest.php | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/lib/Files/Cache/ScannerTest.php b/tests/lib/Files/Cache/ScannerTest.php index 52587aeabc765..22d458a4a9b1b 100644 --- a/tests/lib/Files/Cache/ScannerTest.php +++ b/tests/lib/Files/Cache/ScannerTest.php @@ -423,4 +423,29 @@ public function testNoETagUnscannedFolder() { $newFolderEntry = $this->cache->get('folder'); $this->assertNotEquals($newFolderEntry->getEtag(), $oldFolderEntry->getEtag()); } + + public function testNoETagUnscannedSubFolder() { + $this->fillTestFolders(); + $this->storage->mkdir('folder/sub'); + + $this->scanner->scan(''); + + $oldFolderEntry1 = $this->cache->get('folder'); + $oldFolderEntry2 = $this->cache->get('folder/sub'); + // create a new file in a folder by keeping the mtime unchanged, but mark the folder as unscanned + $this->storage->file_put_contents('folder/sub/new.txt', 'foo'); + $this->storage->touch('folder/sub', $oldFolderEntry1->getMTime()); + + // we only mark the direct parent as unscanned, which is the current "notify" behavior + $this->cache->update($oldFolderEntry2->getId(), ['size' => -1]); + + $this->scanner->scan(''); + + $this->cache->inCache('folder/new.txt'); + + $newFolderEntry1 = $this->cache->get('folder'); + $this->assertNotEquals($newFolderEntry1->getEtag(), $oldFolderEntry1->getEtag()); + $newFolderEntry2 = $this->cache->get('folder/sub'); + $this->assertNotEquals($newFolderEntry2->getEtag(), $oldFolderEntry2->getEtag()); + } } From 9616d48c388a62d8dce72d95d4810a346c863824 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Wed, 22 Nov 2023 16:27:29 +0100 Subject: [PATCH 4/4] also updated parent etags when a changed etag is detected during scanning Signed-off-by: Robin Appelman --- lib/private/Files/Cache/Scanner.php | 35 +++++++++++++++---- .../Files/ObjectStore/ObjectStoreScanner.php | 2 +- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/lib/private/Files/Cache/Scanner.php b/lib/private/Files/Cache/Scanner.php index e1e3528c9784a..0c82e21e30d47 100644 --- a/lib/private/Files/Cache/Scanner.php +++ b/lib/private/Files/Cache/Scanner.php @@ -204,6 +204,7 @@ public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = $data['fileid'] = $fileId; // only reuse data if the file hasn't explicitly changed $mtimeUnchanged = isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']; + // if the folder is marked as unscanned, never reuse etags if ($mtimeUnchanged && $cacheData['size'] !== -1) { $data['mtime'] = $cacheData['mtime']; if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) { @@ -221,6 +222,11 @@ public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = // Only update metadata that has changed $newData = array_diff_assoc($data, $cacheData->getData()); + + // make it known to the caller that etag has been changed and needs propagation + if (isset($newData['etag'])) { + $data['etag_changed'] = true; + } } else { // we only updated unencrypted_size if it's already set unset($data['unencrypted_size']); @@ -389,16 +395,20 @@ protected function getExistingChildren($folderId) { * @param int|float $oldSize the size of the folder before (re)scanning the children * @return int|float the size of the scanned folder or -1 if the size is unknown at this stage */ - protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize) { + protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize, &$etagChanged = false) { if ($reuse === -1) { $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG; } $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', [$path, $this->storageId]); $size = 0; - $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size); + $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size, $etagChanged); foreach ($childQueue as $child => [$childId, $childSize]) { - $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize); + // "etag changed" propagates up, but not down, so we pass `false` to the children even if we already know that the etag of the current folder changed + $childEtagChanged = false; + $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize, $childEtagChanged); + $etagChanged |= $childEtagChanged; + if ($childSize === -1) { $size = -1; } elseif ($size !== -1) { @@ -411,8 +421,17 @@ protected function scanChildren(string $path, $recursive, int $reuse, int $folde if ($this->storage->instanceOfStorage(Encryption::class)) { $this->cache->calculateFolderSize($path); } else { - if ($this->cacheActive && $oldSize !== $size) { - $this->cache->update($folderId, ['size' => $size]); + if ($this->cacheActive) { + $updatedData = []; + if ($oldSize !== $size) { + $updatedData['size'] = $size; + } + if ($etagChanged) { + $updatedData['etag'] = uniqid(); + } + if ($updatedData) { + $this->cache->update($folderId, $updatedData); + } } } $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', [$path, $this->storageId]); @@ -422,7 +441,7 @@ protected function scanChildren(string $path, $recursive, int $reuse, int $folde /** * @param bool|IScanner::SCAN_RECURSIVE_INCOMPLETE $recursive */ - private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size): array { + private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size, bool &$etagChanged): array { // we put this in it's own function so it cleans up the memory before we start recursing $existingChildren = $this->getExistingChildren($folderId); $newChildren = iterator_to_array($this->storage->getDirectoryContent($path)); @@ -470,6 +489,10 @@ private function handleChildren(string $path, $recursive, int $reuse, int $folde } elseif ($size !== -1) { $size += $data['size']; } + + if (isset($data['etag_changed']) && $data['etag_changed']) { + $etagChanged = true; + } } } catch (Exception $ex) { // might happen if inserting duplicate while a scanning diff --git a/lib/private/Files/ObjectStore/ObjectStoreScanner.php b/lib/private/Files/ObjectStore/ObjectStoreScanner.php index d827662ae0b1a..8a9b844c47fad 100644 --- a/lib/private/Files/ObjectStore/ObjectStoreScanner.php +++ b/lib/private/Files/ObjectStore/ObjectStoreScanner.php @@ -39,7 +39,7 @@ public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $loc return []; } - protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize) { + protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize, &$etagChanged = false) { return 0; }