From 1f482516de74f4f81c7f96a07c38def2719cfa02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Tue, 20 Aug 2024 11:25:18 +0200 Subject: [PATCH] Handle file id clashes when a file/dir was copied/restored with xattrs --- pkg/storage/fs/posix/tree/assimilation.go | 113 ++++++++++++++-------- pkg/storage/fs/posix/tree/tree_test.go | 40 ++++++++ 2 files changed, 111 insertions(+), 42 deletions(-) diff --git a/pkg/storage/fs/posix/tree/assimilation.go b/pkg/storage/fs/posix/tree/assimilation.go index 7201f8b024..62d7e32f0c 100644 --- a/pkg/storage/fs/posix/tree/assimilation.go +++ b/pkg/storage/fs/posix/tree/assimilation.go @@ -338,56 +338,75 @@ func (t *Tree) assimilate(item scanItem) error { id, err = t.lookup.MetadataBackend().Get(context.Background(), item.Path, prefixes.IDAttr) if err == nil { previousPath, ok := t.lookup.(*lookup.Lookup).GetCachedID(context.Background(), spaceID, string(id)) - - // This item had already been assimilated in the past. Update the path - _ = t.lookup.(*lookup.Lookup).CacheID(context.Background(), spaceID, string(id), item.Path) - previousParentID, _ := t.lookup.MetadataBackend().Get(context.Background(), item.Path, prefixes.ParentidAttr) - fi, err := t.updateFile(item.Path, string(id), spaceID) - if err != nil { - return err - } - - // was it moved? + // was it moved or copied/restored with a clashing id? if ok && len(previousParentID) > 0 && previousPath != item.Path { - // purge original metadata. Only delete the path entry using DeletePath(reverse lookup), not the whole entry pair. - _ = t.lookup.(*lookup.Lookup).IDCache.DeletePath(context.Background(), previousPath) - _ = t.lookup.MetadataBackend().Purge(previousPath) + _, err := os.Stat(previousPath) + if err == nil { + // this id clashes with an existing id -> clear metadata and re-assimilate + + t.lookup.MetadataBackend().Purge(context.Background(), item.Path) + go func() { + _ = t.assimilate(scanItem{Path: item.Path, ForceRescan: true}) + }() + } else { + // this is a move + _ = t.lookup.(*lookup.Lookup).CacheID(context.Background(), spaceID, string(id), item.Path) + _, err := t.updateFile(item.Path, string(id), spaceID) + if err != nil { + return err + } - if fi.IsDir() { - // if it was moved and it is a directory we need to propagate the move - go func() { _ = t.WarmupIDCache(item.Path, false) }() - } + // purge original metadata. Only delete the path entry using DeletePath(reverse lookup), not the whole entry pair. + _ = t.lookup.(*lookup.Lookup).IDCache.DeletePath(context.Background(), previousPath) + _ = t.lookup.MetadataBackend().Purge(context.Background(), previousPath) - parentID, err := t.lookup.MetadataBackend().Get(context.Background(), item.Path, prefixes.ParentidAttr) - if err == nil && len(parentID) > 0 { - ref := &provider.Reference{ - ResourceId: &provider.ResourceId{ - StorageId: t.options.MountID, - SpaceId: spaceID, - OpaqueId: string(parentID), - }, - Path: filepath.Base(item.Path), + fi, err := os.Stat(item.Path) + if err != nil { + return err } - oldRef := &provider.Reference{ - ResourceId: &provider.ResourceId{ - StorageId: t.options.MountID, - SpaceId: spaceID, - OpaqueId: string(previousParentID), - }, - Path: filepath.Base(previousPath), + if fi.IsDir() { + // if it was moved and it is a directory we need to propagate the move + go func() { _ = t.WarmupIDCache(item.Path, false) }() } - t.PublishEvent(events.ItemMoved{ - SpaceOwner: user, - Executant: user, - Owner: user, - Ref: ref, - OldReference: oldRef, - Timestamp: utils.TSNow(), - }) + + parentID, err := t.lookup.MetadataBackend().Get(context.Background(), item.Path, prefixes.ParentidAttr) + if err == nil && len(parentID) > 0 { + ref := &provider.Reference{ + ResourceId: &provider.ResourceId{ + StorageId: t.options.MountID, + SpaceId: spaceID, + OpaqueId: string(parentID), + }, + Path: filepath.Base(item.Path), + } + oldRef := &provider.Reference{ + ResourceId: &provider.ResourceId{ + StorageId: t.options.MountID, + SpaceId: spaceID, + OpaqueId: string(previousParentID), + }, + Path: filepath.Base(previousPath), + } + t.PublishEvent(events.ItemMoved{ + SpaceOwner: user, + Executant: user, + Owner: user, + Ref: ref, + OldReference: oldRef, + Timestamp: utils.TSNow(), + }) + } + } + } else { + // This item had already been assimilated in the past. Update the path + _ = t.lookup.(*lookup.Lookup).CacheID(context.Background(), spaceID, string(id), item.Path) + + _, err := t.updateFile(item.Path, string(id), spaceID) + if err != nil { + return err } - // } } } else { // assimilate new file @@ -584,6 +603,16 @@ func (t *Tree) WarmupIDCache(root string, assimilate bool) error { id, ok := attribs[prefixes.IDAttr] if ok { + // Check if the item on the previous still exists. In this case it might have been a copy with extended attributes -> set new ID + previousPath, ok := t.lookup.(*lookup.Lookup).GetCachedID(context.Background(), string(spaceID), string(id)) + if ok && previousPath != path { + // this id clashes with an existing id -> clear metadata and re-assimilate + _, err := os.Stat(previousPath) + if err == nil { + t.lookup.MetadataBackend().Purge(context.Background(), path) + _ = t.assimilate(scanItem{Path: path, ForceRescan: true}) + } + } _ = t.lookup.(*lookup.Lookup).CacheID(context.Background(), string(spaceID), string(id), path) } } else if assimilate { diff --git a/pkg/storage/fs/posix/tree/tree_test.go b/pkg/storage/fs/posix/tree/tree_test.go index 1f7c31086c..b660fb9a22 100644 --- a/pkg/storage/fs/posix/tree/tree_test.go +++ b/pkg/storage/fs/posix/tree/tree_test.go @@ -4,6 +4,7 @@ import ( "crypto/rand" "log" "os" + "os/exec" "strings" "time" @@ -222,6 +223,45 @@ var _ = Describe("Tree", func() { g.Expect(n.Blobsize).To(Equal(int64(0))) }).Should(Succeed()) }) + + It("handles id clashes", func() { + // Create empty file + _, err := os.Create(root + "/original.txt") + Expect(err).ToNot(HaveOccurred()) + + fileID := "" + // Wait for the file to be indexed + Eventually(func(g Gomega) { + n, err := env.Lookup.NodeFromResource(env.Ctx, &provider.Reference{ + ResourceId: env.SpaceRootRes, + Path: subtree + "/original.txt", + }) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(n).ToNot(BeNil()) + g.Expect(n.Type(env.Ctx)).To(Equal(provider.ResourceType_RESOURCE_TYPE_FILE)) + g.Expect(n.ID).ToNot(BeEmpty()) + fileID = n.ID + g.Expect(n.Blobsize).To(Equal(int64(0))) + }).Should(Succeed()) + + // cp file + cmd := exec.Command("cp", "-a", root+"/original.txt", root+"/moved.txt") + err = cmd.Run() + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + n, err := env.Lookup.NodeFromResource(env.Ctx, &provider.Reference{ + ResourceId: env.SpaceRootRes, + Path: subtree + "/moved.txt", + }) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(n).ToNot(BeNil()) + g.Expect(n.Type(env.Ctx)).To(Equal(provider.ResourceType_RESOURCE_TYPE_FILE)) + g.Expect(n.ID).ToNot(BeEmpty()) + g.Expect(n.ID).ToNot(Equal(fileID)) + g.Expect(n.Blobsize).To(Equal(int64(0))) + }).Should(Succeed()) + }) }) Describe("of directories", func() {