From 87cfd4242108f3d542fd87754f901205c51da6c5 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Mon, 31 May 2021 18:46:14 -0300 Subject: [PATCH 01/38] WIP --- io/directory.go | 108 +++++++++++++++++++++++++++++++++++++++++++ io/directory_test.go | 19 ++++++++ 2 files changed, 127 insertions(+) diff --git a/io/directory.go b/io/directory.go index 15c7e862a..82499a3dc 100644 --- a/io/directory.go +++ b/io/directory.go @@ -93,6 +93,10 @@ type BasicDirectory struct { type HAMTDirectory struct { shard *hamt.Shard dserv ipld.DAGService + + // Track the changes in size by the AddChild and RemoveChild calls + // for the HAMTShardingSize option. + sizeChange int } func newEmptyBasicDirectory(dserv ipld.DAGService) *BasicDirectory { @@ -311,6 +315,11 @@ func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { // AddChild implements the `Directory` interface. func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { + // FIXME: This needs to be moved to Shard internals to make sure we are + // actually adding a new entry (increasing size) or just replacing an + // old one (do nothing, or get a difference in entry size). + d.addToSizeChange(name, nd.Cid()) + return d.shard.Set(ctx, name, nd) } @@ -342,6 +351,10 @@ func (d *HAMTDirectory) Find(ctx context.Context, name string) (ipld.Node, error // RemoveChild implements the `Directory` interface. func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error { + // FIXME: Same note as in AddChild, with the added consideration that + // we need to retrieve the entry before removing to adjust size. + d.removeFromSizeChange(name, cid.Undef) + return d.shard.Remove(ctx, name) } @@ -355,8 +368,54 @@ func (d *HAMTDirectory) GetCidBuilder() cid.Builder { return d.shard.CidBuilder() } +// switchToBasic returns a BasicDirectory implementation of this directory. +func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, error) { + basicDir := newEmptyBasicDirectory(d.dserv) + basicDir.SetCidBuilder(d.GetCidBuilder()) + + d.ForEachLink(nil, func(lnk *ipld.Link) error { + node, err := d.dserv.Get(ctx, lnk.Cid) + if err != nil { + return err + } + + err = basicDir.AddChild(ctx, lnk.Name, node) + if err != nil { + return err + } + + return nil + }) + // FIXME: The above was adapted from SwitchToSharding. We can probably optimize: + // 1. Do not retrieve the full node but create the link from the + // HAMT entry and add it to the BasicDirectory with a (new) + // plumbing function that operates below the AddChild level. + // 2. Do not enumerate all link from scratch (harder than previous + // item). We call this function only from the UpgradeableDirectory + // when we went below the threshold: the detection will be done through + // (partial) enumeration. We may be able to reuse some of that work + // (likely retaining the links in a cache). (All this is uncertain + // at this point as we don't know how partial enumeration will be + // implemented.) + + return basicDir, nil +} + +func (d *HAMTDirectory) addToSizeChange(name string, linkCid cid.Cid) { + d.sizeChange += estimatedLinkSize(name, linkCid) +} + +func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) { + d.sizeChange -= estimatedLinkSize(name, linkCid) +} + // UpgradeableDirectory wraps a Directory interface and provides extra logic // to upgrade from its BasicDirectory implementation to HAMTDirectory. +// FIXME: Rename to something that reflects the new bi-directionality. We no +// longer go in the "forward" direction (upgrade) but we also go "backward". +// Possible alternatives: SwitchableDirectory or DynamicDirectory. Also consider +// more generic-sounding names like WrapperDirectory that emphasize that this +// is just the middleman and has no real Directory-implementing logic. type UpgradeableDirectory struct { Directory } @@ -394,3 +453,52 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl return nil } + +// FIXME: Consider implementing RemoveChild to do an eager enumeration if +// the HAMT sizeChange goes below a certain point (normally lower than just +// zero to not enumerate in *any* occasion the size delta goes transiently +// negative). +//func (d *UpgradeableDirectory) RemoveChild(ctx context.Context, name string) error + +// GetNode implements the `Directory` interface. Used in the case where we wrap +// a HAMTDirectory that might need to be downgraded to a BasicDirectory. The +// upgrade path is in AddChild; we delay the downgrade until we are forced to +// commit to a CID (through the returned node) to avoid costly enumeration in +// the sharding case (that by definition will have potentially many more entries +// than the BasicDirectory). +// FIXME: We need to be *very* sure that the returned downgraded BasicDirectory +// will be in fact *above* the HAMTShardingSize threshold to avoid churning. +// We may even need to use a basic low/high water markings (like a small +// percentage above and below the original user-set HAMTShardingSize). +func (d *UpgradeableDirectory) GetNode() (ipld.Node, error) { + hamtDir, ok := d.Directory.(*HAMTDirectory) + if !ok { + return d.Directory.GetNode() // BasicDirectory + } + + if HAMTShardingSize != 0 && hamtDir.sizeChange < 0 { + // We have reduced the directory size, check if it didn't go under + // the HAMTShardingSize threshold. + // FIXME: We should do a partial enumeration instead of the full one + // enumerating until we reach HAMTShardingSize (or we run out of + // entries meaning we need to switch). + totalSize := 0 + ctx := context.Background() + // FIXME: We will need a context for this closure to make sure + // we don't stall here. This might mean changing the current + // Directory interface. + hamtDir.ForEachLink(ctx, func(l *ipld.Link) error { + totalSize += estimatedLinkSize(l.Name, l.Cid) + return nil + }) + if totalSize < HAMTShardingSize { + basicDir, err := hamtDir.switchToBasic(ctx) + if err != nil { + return nil, err + } + d.Directory = basicDir + } + } + + return d.Directory.GetNode() +} diff --git a/io/directory_test.go b/io/directory_test.go index 8c5d8e109..62074a93b 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -105,6 +105,8 @@ func TestDuplicateAddDir(t *testing.T) { // restored node check from NewDirectoryFromNode). // * Check estimated size against encoded node (the difference should only be // a small percentage for a directory with 10s of entries). +// FIXME: Add a test for the HAMT sizeChange abstracting some of the code from +// this one. func TestBasicDirectory_estimatedSize(t *testing.T) { ds := mdtest.Mock() ctx := context.Background() @@ -210,6 +212,23 @@ func TestUpgradeableDirectory(t *testing.T) { if _, ok := dir.(*UpgradeableDirectory).Directory.(*HAMTDirectory); !ok { t.Fatal("UpgradeableDirectory wasn't upgraded to HAMTDirectory for a low threshold") } + + // Remove the single entry triggering the switch back to BasicDirectory + err = dir.RemoveChild(ctx, "test") + if err != nil { + t.Fatal(err) + } + // For performance reasons we only switch when serializing the data + // in the node format and not on any entry removal. + _, err = dir.GetNode() + if err != nil { + t.Fatal(err) + } + if _, ok := dir.(*UpgradeableDirectory).Directory.(*BasicDirectory); !ok { + t.Fatal("UpgradeableDirectory wasn't downgraded to BasicDirectory after removal of the single entry") + } + + // FIXME: Test integrity of entries in-between switches. } func TestDirBuilder(t *testing.T) { From d9de8932b107666cb519ae3b1b1653bc17551d3d Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 16 Jun 2021 16:41:51 -0300 Subject: [PATCH 02/38] use parallel EnumLinksAsync --- io/directory.go | 121 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 100 insertions(+), 21 deletions(-) diff --git a/io/directory.go b/io/directory.go index 82499a3dc..67d97a485 100644 --- a/io/directory.go +++ b/io/directory.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "time" mdag "github.com/ipfs/go-merkledag" @@ -25,6 +26,10 @@ var log = logging.Logger("unixfs") // ProtoNode doesn't use the Data field so this estimate is pretty accurate). var HAMTShardingSize = 0 +// Time allowed to fetch the shards to compute the size before returning +// an error. +var EvaluateHAMTTransitionTimeout = time.Duration(1) + // DefaultShardWidth is the default value used for hamt sharding width. var DefaultShardWidth = 256 @@ -409,6 +414,76 @@ func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) { d.sizeChange -= estimatedLinkSize(name, linkCid) } +// Evaluate directory size and check if it's below HAMTShardingSize threshold +// (to trigger a transition to a BasicDirectory). It returns two `bool`s: +// * whether it's below (true) or equal/above (false) +// * whether the passed timeout to compute the size has been exceeded +// Instead of enumearting the entire tree we eagerly call EnumLinksAsync +// until we either reach a value above the threshold (in that case no need) +// to keep counting or the timeout runs out in which case the `below` return +// value is not to be trusted as we didn't have time to count enough shards. +func (d *HAMTDirectory) sizeBelowThreshold(timeout time.Duration) (below bool, timeoutExceeded bool) { + if HAMTShardingSize == 0 { + panic("asked to compute HAMT size with HAMTShardingSize option off (0)") + } + + // We don't necessarily compute the full size of *all* shards as we might + // end early if we already know we're above the threshold or run out of time. + partialSize := 0 + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + for linkResult := range d.EnumLinksAsync(ctx) { + if linkResult.Err != nil { + continue + // The timeout exceeded errors will be coming through here but I'm + // not sure if we can just compare against a generic DeadlineExceeded + // error here to return early and avoid iterating the entire loop. + // (We might confuse a specific DeadlineExceeded of an internal function + // with our context here.) + // Since *our* DeadlineExceeded will quickly propagate to any other + // pending fetches it seems that iterating the entire loop won't add + // much more cost anyway. + // FIXME: Check the above reasoning. + } + if linkResult.Link == nil { + panic("empty link result (both values nil)") + // FIXME: Is this *ever* possible? + } + partialSize += estimatedLinkSize(linkResult.Link.Name, linkResult.Link.Cid) + + if partialSize >= HAMTShardingSize { + // We have already fetched enough shards to assert we are + // above the threshold, so no need to keep fetching. + cancel() + return false, false + } + } + // At this point either we enumerated all shards or run out of time. + // Figure out which. + + if ctx.Err() == context.Canceled { + panic("the context was canceled but we're still evaluating a possible switch") + } + if partialSize >= HAMTShardingSize { + panic("we reach the threshold but we're still evaluating a possible switch") + } + + if ctx.Err() == context.DeadlineExceeded { + return false, true + } + + // If we reach this then: + // * We are below the threshold (we didn't return inside the EnumLinksAsync + // loop). + // * The context wasn't cancelled so we iterated *all* shards + // and are sure that we have the full size. + // FIXME: Can we actually verify the last claim here to be sure? + // (Iterating all the shards in the HAMT as a plumbing function maybe. + // If they're in memory it shouldn't be that expensive, we won't be + // switching that often, probably.) + return true, false +} + // UpgradeableDirectory wraps a Directory interface and provides extra logic // to upgrade from its BasicDirectory implementation to HAMTDirectory. // FIXME: Rename to something that reflects the new bi-directionality. We no @@ -476,28 +551,32 @@ func (d *UpgradeableDirectory) GetNode() (ipld.Node, error) { return d.Directory.GetNode() // BasicDirectory } - if HAMTShardingSize != 0 && hamtDir.sizeChange < 0 { - // We have reduced the directory size, check if it didn't go under - // the HAMTShardingSize threshold. - // FIXME: We should do a partial enumeration instead of the full one - // enumerating until we reach HAMTShardingSize (or we run out of - // entries meaning we need to switch). - totalSize := 0 - ctx := context.Background() - // FIXME: We will need a context for this closure to make sure - // we don't stall here. This might mean changing the current - // Directory interface. - hamtDir.ForEachLink(ctx, func(l *ipld.Link) error { - totalSize += estimatedLinkSize(l.Name, l.Cid) - return nil - }) - if totalSize < HAMTShardingSize { - basicDir, err := hamtDir.switchToBasic(ctx) - if err != nil { - return nil, err - } - d.Directory = basicDir + if HAMTShardingSize == 0 && // Option disabled. + hamtDir.sizeChange < 0 { // We haven't reduced the HAMT size. + return d.Directory.GetNode() + } + + // We have reduced the directory size, check if it didn't go under + // the HAMTShardingSize threshold. + + belowThreshold, timeoutExceeded := hamtDir.sizeBelowThreshold(EvaluateHAMTTransitionTimeout) + + if timeoutExceeded { + // We run out of time before confirming if we're indeed below the + // threshold. When in doubt error to not return inconsistent structures. + return nil, fmt.Errorf("not enought time to fetch shards") + // FIXME: Abstract in new error for testing. + } + + if belowThreshold { + // Switch. + basicDir, err := hamtDir.switchToBasic(context.Background()) + // FIXME: The missing context will be provided once we move this to + // AddChild and remove child. + if err != nil { + return nil, err } + d.Directory = basicDir } return d.Directory.GetNode() From db2f266bd40a38443a2634cf35b087ef28fdf19b Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 16 Jun 2021 16:53:49 -0300 Subject: [PATCH 03/38] fix time unit in seconds --- io/directory.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io/directory.go b/io/directory.go index 67d97a485..3881b902d 100644 --- a/io/directory.go +++ b/io/directory.go @@ -26,8 +26,8 @@ var log = logging.Logger("unixfs") // ProtoNode doesn't use the Data field so this estimate is pretty accurate). var HAMTShardingSize = 0 -// Time allowed to fetch the shards to compute the size before returning -// an error. +// Time in seconds allowed to fetch the shards to compute the size before +// returning an error. var EvaluateHAMTTransitionTimeout = time.Duration(1) // DefaultShardWidth is the default value used for hamt sharding width. @@ -431,7 +431,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(timeout time.Duration) (below bool, t // end early if we already know we're above the threshold or run out of time. partialSize := 0 - ctx, cancel := context.WithTimeout(context.Background(), timeout) + ctx, cancel := context.WithTimeout(context.Background(), time.Second * timeout) for linkResult := range d.EnumLinksAsync(ctx) { if linkResult.Err != nil { continue From f3d7009312f5aba30cfa80e7c69c7af74a4b7236 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 16 Jun 2021 17:13:59 -0300 Subject: [PATCH 04/38] move downgrade logic to remove child --- io/directory.go | 57 +++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/io/directory.go b/io/directory.go index 3881b902d..03ce656f4 100644 --- a/io/directory.go +++ b/io/directory.go @@ -28,6 +28,7 @@ var HAMTShardingSize = 0 // Time in seconds allowed to fetch the shards to compute the size before // returning an error. +// FIXME: Adjust to sane value. var EvaluateHAMTTransitionTimeout = time.Duration(1) // DefaultShardWidth is the default value used for hamt sharding width. @@ -431,7 +432,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(timeout time.Duration) (below bool, t // end early if we already know we're above the threshold or run out of time. partialSize := 0 - ctx, cancel := context.WithTimeout(context.Background(), time.Second * timeout) + ctx, cancel := context.WithTimeout(context.Background(), time.Second*timeout) for linkResult := range d.EnumLinksAsync(ctx) { if linkResult.Err != nil { continue @@ -529,55 +530,51 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl return nil } -// FIXME: Consider implementing RemoveChild to do an eager enumeration if -// the HAMT sizeChange goes below a certain point (normally lower than just -// zero to not enumerate in *any* occasion the size delta goes transiently -// negative). -//func (d *UpgradeableDirectory) RemoveChild(ctx context.Context, name string) error - -// GetNode implements the `Directory` interface. Used in the case where we wrap +// RemoveChild implements the `Directory` interface. Used in the case where we wrap // a HAMTDirectory that might need to be downgraded to a BasicDirectory. The -// upgrade path is in AddChild; we delay the downgrade until we are forced to -// commit to a CID (through the returned node) to avoid costly enumeration in -// the sharding case (that by definition will have potentially many more entries -// than the BasicDirectory). -// FIXME: We need to be *very* sure that the returned downgraded BasicDirectory -// will be in fact *above* the HAMTShardingSize threshold to avoid churning. -// We may even need to use a basic low/high water markings (like a small -// percentage above and below the original user-set HAMTShardingSize). -func (d *UpgradeableDirectory) GetNode() (ipld.Node, error) { +// upgrade path is in AddChild. +// FIXME: Consider adding some margin in the comparison against HAMTShardingSize +// to avoid an eager enumeration at the first opportunity we go below it +// (in which case we would have the hard comparison in GetNode() to make +// sure we make good on the value). Finding the right margin can be tricky +// and very dependent on the use case so it might not be worth it. +func (d *UpgradeableDirectory) RemoveChild(ctx context.Context, name string) error { + if err := d.Directory.RemoveChild(ctx, name); err != nil { + return err + } + hamtDir, ok := d.Directory.(*HAMTDirectory) - if !ok { - return d.Directory.GetNode() // BasicDirectory + if !ok { // BasicDirectory + return nil } if HAMTShardingSize == 0 && // Option disabled. - hamtDir.sizeChange < 0 { // We haven't reduced the HAMT size. - return d.Directory.GetNode() + hamtDir.sizeChange < 0 { // We haven't reduced the HAMT net size. + return nil } // We have reduced the directory size, check if it didn't go under // the HAMTShardingSize threshold. - belowThreshold, timeoutExceeded := hamtDir.sizeBelowThreshold(EvaluateHAMTTransitionTimeout) if timeoutExceeded { // We run out of time before confirming if we're indeed below the // threshold. When in doubt error to not return inconsistent structures. - return nil, fmt.Errorf("not enought time to fetch shards") + // FIXME: We could allow this to return without error and enforce this + // timeout on a GetNode() call when we need to actually commit to a + // structure/CID. (The downside is that GetNode() doesn't have a + // context argument and we would have to break the API.) + return fmt.Errorf("not enought time to fetch shards") // FIXME: Abstract in new error for testing. } - if belowThreshold { - // Switch. - basicDir, err := hamtDir.switchToBasic(context.Background()) - // FIXME: The missing context will be provided once we move this to - // AddChild and remove child. + if belowThreshold { // Switch. + basicDir, err := hamtDir.switchToBasic(ctx) if err != nil { - return nil, err + return err } d.Directory = basicDir } - return d.Directory.GetNode() + return nil } From eced2f5179e4c12ec578d16b762e72ba0cb5d268 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 16 Jun 2021 17:34:51 -0300 Subject: [PATCH 05/38] fix static check --- io/directory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io/directory.go b/io/directory.go index 03ce656f4..3cb199eeb 100644 --- a/io/directory.go +++ b/io/directory.go @@ -379,7 +379,7 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err basicDir := newEmptyBasicDirectory(d.dserv) basicDir.SetCidBuilder(d.GetCidBuilder()) - d.ForEachLink(nil, func(lnk *ipld.Link) error { + d.ForEachLink(context.TODO(), func(lnk *ipld.Link) error { node, err := d.dserv.Get(ctx, lnk.Cid) if err != nil { return err From 7d5b41fba43acb5450f9b4bc39e30b69d60dca84 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 16 Jun 2021 18:08:08 -0300 Subject: [PATCH 06/38] fix cancel usage --- io/directory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io/directory.go b/io/directory.go index 3cb199eeb..699d48cd4 100644 --- a/io/directory.go +++ b/io/directory.go @@ -433,6 +433,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(timeout time.Duration) (below bool, t partialSize := 0 ctx, cancel := context.WithTimeout(context.Background(), time.Second*timeout) + defer cancel() for linkResult := range d.EnumLinksAsync(ctx) { if linkResult.Err != nil { continue @@ -455,7 +456,6 @@ func (d *HAMTDirectory) sizeBelowThreshold(timeout time.Duration) (below bool, t if partialSize >= HAMTShardingSize { // We have already fetched enough shards to assert we are // above the threshold, so no need to keep fetching. - cancel() return false, false } } From e7eff28d0805057919cae481e00f6a458e27d889 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 17 Jun 2021 18:28:00 -0300 Subject: [PATCH 07/38] make modifyValue (now setValue) return the old entry --- hamt/hamt.go | 123 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 39 deletions(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index 55b798ce4..568d1c253 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -206,26 +206,41 @@ func (ds *Shard) makeShardValue(lnk *ipld.Link) (*Shard, error) { // Set sets 'name' = nd in the HAMT func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error { + _, err := ds.setAndPrevious(ctx, name, nd) + return err +} + +// setAndPrevious sets a link poiting to the passed node as the value under the +// name key in this Shard or its children. It also returns the previous link +// under that name key (if any). +func (ds *Shard) setAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) { hv := &hashBits{b: hash([]byte(name))} - err := ds.dserv.Add(ctx, nd) + err := ds.dserv.Add(ctx, node) if err != nil { - return err + return nil, err } - lnk, err := ipld.MakeLink(nd) + lnk, err := ipld.MakeLink(node) if err != nil { - return err + return nil, err } lnk.Name = ds.linkNamePrefix(0) + name - return ds.modifyValue(ctx, hv, name, lnk) + return ds.setValue(ctx, hv, name, lnk) } // Remove deletes the named entry if it exists. Otherwise, it returns // os.ErrNotExist. func (ds *Shard) Remove(ctx context.Context, name string) error { + _, err := ds.removeAndPrevious(ctx, name) + return err +} + +// removeAndPrevious is similar to the public Remove but also returns the +// old removed link (if it exists). +func (ds *Shard) removeAndPrevious(ctx context.Context, name string) (*ipld.Link, error) { hv := &hashBits{b: hash([]byte(name))} - return ds.modifyValue(ctx, hv, name, nil) + return ds.setValue(ctx, hv, name, nil) } // Find searches for a child node by 'name' within this hamt @@ -419,75 +434,104 @@ func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error { }) } -func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val *ipld.Link) error { +// setValue sets the link `value` in the given key, either creating the entry +// if it didn't exist or overwriting the old one. It returns the old entry (if any). +func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *ipld.Link) (oldValue *ipld.Link, err error) { idx, err := hv.Next(ds.tableSizeLg2) if err != nil { - return err + return } if !ds.childer.has(idx) { - return ds.childer.insert(key, val, idx) + // Entry does not exist, create a new one. + return nil, ds.childer.insert(key, value, idx) } i := ds.childer.sliceIndex(idx) - child, err := ds.childer.get(ctx, i) if err != nil { - return err + return } if child.isValueNode() { + // Leaf node. This is the base case of this recursive function. + // FIXME: Misleading: the base case also includes a recursive call + // in the case both keys share the same slot in the new child shard + // below. if child.key == key { - // value modification - if val == nil { - return ds.childer.rm(idx) + // We are in the correct shard (tree level) so we modify this child + // and return. + oldValue = child.val + + if value == nil { // Remove old entry. + return oldValue, ds.childer.rm(idx) } - child.val = val - return nil + child.val = value // Overwrite entry. + return } - if val == nil { - return os.ErrNotExist + if value == nil { + return nil, os.ErrNotExist + // FIXME: Move this case to the top of the clause as + // if child.key != key + // and remove the indentation on the big if. } - // replace value with another shard, one level deeper - ns, err := NewShard(ds.dserv, ds.tableSize) + // We are in the same slot with another entry with a different key + // so we need to fork this leaf node into a shard with two childs: + // the old entry and the new one being inserted here. + // We don't overwrite anything here so we keep: + // `oldValue = nil` + + // The child of this shard will now be a new shard. The old child value + // will be a child of this new shard (along with the new value being + // inserted). + grandChild := child + child, err = NewShard(ds.dserv, ds.tableSize) if err != nil { - return err + return nil, err } - ns.builder = ds.builder + child.builder = ds.builder chhv := &hashBits{ - b: hash([]byte(child.key)), + b: hash([]byte(grandChild.key)), consumed: hv.consumed, } - err = ns.modifyValue(ctx, hv, key, val) + // We explicitly ignore the oldValue returned by the next two insertions + // (which will be nil) to highlight there is no overwrite here: they are + // done with different keys to a new (empty) shard. (At best this shard + // will create new ones until we find different slots for both.) + _, err = child.setValue(ctx, hv, key, value) if err != nil { - return err + return } - - err = ns.modifyValue(ctx, chhv, child.key, child.val) + _, err = child.setValue(ctx, chhv, grandChild.key, grandChild.val) if err != nil { - return err + return } - ds.childer.set(ns, i) - return nil + // Replace this leaf node with the new Shard node. + ds.childer.set(child, i) + return nil, nil } else { - err := child.modifyValue(ctx, hv, key, val) + // We are in a Shard (internal node). We will recursively call this + // function until finding the leaf (the logic of the `if` case above). + oldValue, err = child.setValue(ctx, hv, key, value) if err != nil { - return err + return } - if val == nil { + if value == nil { + // We have removed an entry, check if we should remove shards + // as well. switch child.childer.length() { case 0: // empty sub-shard, prune it // Note: this shouldnt normally ever happen // in the event of another implementation creates flawed // structures, this will help to normalize them. - return ds.childer.rm(idx) + return oldValue, ds.childer.rm(idx) case 1: // The single child _should_ be a value by // induction. However, we allow for it to be a @@ -499,24 +543,25 @@ func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val if schild.isValueNode() { ds.childer.set(schild, i) } - return nil + return } // Otherwise, work with the link. slnk := child.childer.link(0) - lnkType, err := child.childer.sd.childLinkType(slnk) + var lnkType linkType + lnkType, err = child.childer.sd.childLinkType(slnk) if err != nil { - return err + return } if lnkType == shardValueLink { // sub-shard with a single value element, collapse it ds.childer.setLink(slnk, i) } - return nil + return } } - return nil + return } } From f1271f9abea12e6ccf456b76b2b3b42ba292819f Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 17 Jun 2021 18:34:28 -0300 Subject: [PATCH 08/38] properly account for size changes in HAMT dir --- hamt/hamt.go | 12 ++++++------ io/directory.go | 31 ++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index 568d1c253..a3d17a915 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -206,14 +206,14 @@ func (ds *Shard) makeShardValue(lnk *ipld.Link) (*Shard, error) { // Set sets 'name' = nd in the HAMT func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error { - _, err := ds.setAndPrevious(ctx, name, nd) + _, err := ds.SetAndPrevious(ctx, name, nd) return err } -// setAndPrevious sets a link poiting to the passed node as the value under the +// SetAndPrevious sets a link poiting to the passed node as the value under the // name key in this Shard or its children. It also returns the previous link // under that name key (if any). -func (ds *Shard) setAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) { +func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) { hv := &hashBits{b: hash([]byte(name))} err := ds.dserv.Add(ctx, node) if err != nil { @@ -232,13 +232,13 @@ func (ds *Shard) setAndPrevious(ctx context.Context, name string, node ipld.Node // Remove deletes the named entry if it exists. Otherwise, it returns // os.ErrNotExist. func (ds *Shard) Remove(ctx context.Context, name string) error { - _, err := ds.removeAndPrevious(ctx, name) + _, err := ds.RemoveAndPrevious(ctx, name) return err } -// removeAndPrevious is similar to the public Remove but also returns the +// RemoveAndPrevious is similar to the public Remove but also returns the // old removed link (if it exists). -func (ds *Shard) removeAndPrevious(ctx context.Context, name string) (*ipld.Link, error) { +func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) { hv := &hashBits{b: hash([]byte(name))} return ds.setValue(ctx, hv, name, nil) } diff --git a/io/directory.go b/io/directory.go index 699d48cd4..5118a83ac 100644 --- a/io/directory.go +++ b/io/directory.go @@ -321,12 +321,20 @@ func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { // AddChild implements the `Directory` interface. func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - // FIXME: This needs to be moved to Shard internals to make sure we are - // actually adding a new entry (increasing size) or just replacing an - // old one (do nothing, or get a difference in entry size). - d.addToSizeChange(name, nd.Cid()) - return d.shard.Set(ctx, name, nd) + oldChild, err := d.shard.SetAndPrevious(ctx, name, nd) + if err != nil { + // FIXME: We don't know if SetAndPrevious failed before removing the + // old entry or not (it certainly failed before *adding* it, right?). + // We should check this. + return err + } + + if oldChild != nil { + d.removeFromSizeChange(oldChild.Name, oldChild.Cid) + } + d.addToSizeChange(name, nd.Cid()) + return nil } // ForEachLink implements the `Directory` interface. @@ -357,11 +365,16 @@ func (d *HAMTDirectory) Find(ctx context.Context, name string) (ipld.Node, error // RemoveChild implements the `Directory` interface. func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error { - // FIXME: Same note as in AddChild, with the added consideration that - // we need to retrieve the entry before removing to adjust size. - d.removeFromSizeChange(name, cid.Undef) + oldChild, err := d.shard.RemoveAndPrevious(ctx, name) + if err != nil { + return err + } - return d.shard.Remove(ctx, name) + if oldChild != nil { + d.removeFromSizeChange(oldChild.Name, oldChild.Cid) + } + + return nil } // GetNode implements the `Directory` interface. From e8562e4256756e8f0744e0bcf99f95e0d95de352 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Fri, 18 Jun 2021 20:25:46 -0300 Subject: [PATCH 09/38] basic dir integrity testing --- go.mod | 1 + go.sum | 5 +++ hamt/hamt.go | 5 +++ io/directory.go | 23 ++++++++++-- io/directory_test.go | 87 ++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 110 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index cdfbc3dbc..2e87353a5 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/smartystreets/assertions v1.0.0 // indirect github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a // indirect github.com/spaolacci/murmur3 v1.1.0 + github.com/stretchr/testify v1.7.0 // indirect github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830 // indirect ) diff --git a/go.sum b/go.sum index 9c5bd5070..fdd5a878a 100644 --- a/go.sum +++ b/go.sum @@ -271,10 +271,13 @@ github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572 h1:RC6RW7j+ github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572/go.mod h1:w0SWMsp6j9O/dk4/ZpIhL+3CkG8ofA2vuv7k+ltqUMc= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830 h1:8kxMKmKzXXL4Ru1nyhvdms/JjWt+3YLpvRb/bAjO/y0= @@ -374,4 +377,6 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= diff --git a/hamt/hamt.go b/hamt/hamt.go index a3d17a915..28e6ac9fa 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -502,6 +502,11 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * // (which will be nil) to highlight there is no overwrite here: they are // done with different keys to a new (empty) shard. (At best this shard // will create new ones until we find different slots for both.) + // FIXME: These two shouldn't be recursive calls where one function + // adds the child and the other replaces it with a shard to go one + // level deeper. This should just be a simple loop that traverses the + // shared prefix in the key adding as many shards as needed and finally + // inserts both leaf links together. _, err = child.setValue(ctx, hv, key, value) if err != nil { return diff --git a/io/directory.go b/io/directory.go index 5118a83ac..d9d4d1415 100644 --- a/io/directory.go +++ b/io/directory.go @@ -80,6 +80,13 @@ type Directory interface { // TODO: Evaluate removing `dserv` from this layer and providing it in MFS. // (The functions should in that case add a `DAGService` argument.) +// Link size estimation function. For production it's usually the one here +// but during test we may mock it to get fixed sizes. +func productionLinkSize(linkName string, linkCid cid.Cid) int { + return len(linkName) + linkCid.ByteLen() +} +var estimatedLinkSize = productionLinkSize + // BasicDirectory is the basic implementation of `Directory`. All the entries // are stored in a single node. type BasicDirectory struct { @@ -146,6 +153,7 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err case format.TDirectory: return &UpgradeableDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil case format.THAMTShard: + // FIXME: We now need to return also the UpgradeableDirectory here. shard, err := hamt.NewHamtFromDag(dserv, node) if err != nil { return nil, err @@ -167,10 +175,6 @@ func (d *BasicDirectory) computeEstimatedSize() { }) } -func estimatedLinkSize(linkName string, linkCid cid.Cid) int { - return len(linkName) + linkCid.ByteLen() -} - func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) { d.estimatedSize += estimatedLinkSize(name, linkCid) } @@ -543,6 +547,17 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl return nil } +func (d *UpgradeableDirectory) getDagService() ipld.DAGService { + switch v := d.Directory.(type) { + case *BasicDirectory: + return v.dserv + case *HAMTDirectory: + return v.dserv + default: + panic("unknown directory type") + } +} + // RemoveChild implements the `Directory` interface. Used in the case where we wrap // a HAMTDirectory that might need to be downgraded to a BasicDirectory. The // upgrade path is in AddChild. diff --git a/io/directory_test.go b/io/directory_test.go index 62074a93b..e09125854 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -4,13 +4,18 @@ import ( "context" "fmt" "math" + "sort" + "strings" "testing" + cid "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" mdag "github.com/ipfs/go-merkledag" mdtest "github.com/ipfs/go-merkledag/test" ft "github.com/ipfs/go-unixfs" + + "github.com/stretchr/testify/assert" ) func TestEmptyNode(t *testing.T) { @@ -164,6 +169,14 @@ func TestBasicDirectory_estimatedSize(t *testing.T) { basicDir.estimatedSize, restoredBasicDir.estimatedSize) } } +// FIXME: Add a similar one for HAMT directory, stressing particularly the +// deleted/overwritten entries and their computation in the size variation. + +func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int { + return func(_ string, _ cid.Cid) int { + return fixedSize + } +} // Basic test on extreme threshold to trigger switch. More fine-grained sizes // are checked in TestBasicDirectory_estimatedSize (without the swtich itself @@ -172,8 +185,12 @@ func TestBasicDirectory_estimatedSize(t *testing.T) { // upgrade on another a better structured test should test both dimensions // simultaneously. func TestUpgradeableDirectory(t *testing.T) { + // FIXME: Modifying these static configuraitons is probably not + // concurrent-friendly. oldHamtOption := HAMTShardingSize defer func() { HAMTShardingSize = oldHamtOption }() + estimatedLinkSize = mockLinkSizeFunc(1) + defer func() { estimatedLinkSize = productionLinkSize }() ds := mdtest.Mock() dir := NewDirectory(ds) @@ -212,23 +229,79 @@ func TestUpgradeableDirectory(t *testing.T) { if _, ok := dir.(*UpgradeableDirectory).Directory.(*HAMTDirectory); !ok { t.Fatal("UpgradeableDirectory wasn't upgraded to HAMTDirectory for a low threshold") } + upgradedDir := copyDir(t, dir) // Remove the single entry triggering the switch back to BasicDirectory err = dir.RemoveChild(ctx, "test") if err != nil { t.Fatal(err) } - // For performance reasons we only switch when serializing the data - // in the node format and not on any entry removal. - _, err = dir.GetNode() - if err != nil { - t.Fatal(err) - } if _, ok := dir.(*UpgradeableDirectory).Directory.(*BasicDirectory); !ok { t.Fatal("UpgradeableDirectory wasn't downgraded to BasicDirectory after removal of the single entry") } - // FIXME: Test integrity of entries in-between switches. + // Check integrity between switches. + // We need to account for the removed entry that triggered the switch + // back. + // FIXME: Abstract this for arbitrary entries. + missingLink, err := ipld.MakeLink(child) + assert.NoError(t, err) + missingLink.Name = "test" + compareDirectoryEntries(t, upgradedDir, dir, []*ipld.Link{missingLink}) +} + +// Compare entries in the leftDir against the rightDir and possibly +// missingEntries in the second. +func compareDirectoryEntries(t *testing.T, leftDir Directory, rightDir Directory, missingEntries []*ipld.Link) { + leftLinks, err := getAllLinksSortedByName(leftDir) + assert.NoError(t, err) + rightLinks, err := getAllLinksSortedByName(rightDir) + assert.NoError(t, err) + rightLinks = append(rightLinks, missingEntries...) + sortLinksByName(rightLinks) + + assert.Equal(t, len(leftLinks), len(rightLinks)) + + for i, leftLink := range leftLinks { + assert.Equal(t, leftLink, rightLinks[i]) // FIXME: Can we just compare the entire struct? + } +} + +func getAllLinksSortedByName(d Directory) ([]*ipld.Link, error) { + entries, err := d.Links(context.Background()) + if err != nil { + return nil, err + } + sortLinksByName(entries) + return entries, nil +} + +func sortLinksByName(l []*ipld.Link) { + sort.SliceStable(l, func(i, j int) bool { + return strings.Compare(l[i].Name, l[j].Name) == -1 // FIXME: Is this correct? + }) +} + +func copyDir(t *testing.T, d Directory) Directory { + dirNode, err := d.GetNode() + assert.NoError(t, err) + // Extract the DAG service from the directory (i.e., its link entries saved + // in it). This is not exposed in the interface and we won't change that now. + // FIXME: Still, this isn't nice. + var ds ipld.DAGService + switch v := d.(type) { + case *BasicDirectory: + ds = v.dserv + case *HAMTDirectory: + ds = v.dserv + case *UpgradeableDirectory: + ds = v.getDagService() + default: + panic("unknown directory type") + } + copiedDir, err := NewDirectoryFromNode(ds, dirNode) + assert.NoError(t, err) + return copiedDir } func TestDirBuilder(t *testing.T) { From 550df9a3bda919f67d75fbd0403cde6777d97ed4 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Fri, 18 Jun 2021 20:48:22 -0300 Subject: [PATCH 10/38] go fmt --- io/directory.go | 1 + io/directory_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/io/directory.go b/io/directory.go index d9d4d1415..92b86b728 100644 --- a/io/directory.go +++ b/io/directory.go @@ -85,6 +85,7 @@ type Directory interface { func productionLinkSize(linkName string, linkCid cid.Cid) int { return len(linkName) + linkCid.ByteLen() } + var estimatedLinkSize = productionLinkSize // BasicDirectory is the basic implementation of `Directory`. All the entries diff --git a/io/directory_test.go b/io/directory_test.go index e09125854..218f25822 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -169,6 +169,7 @@ func TestBasicDirectory_estimatedSize(t *testing.T) { basicDir.estimatedSize, restoredBasicDir.estimatedSize) } } + // FIXME: Add a similar one for HAMT directory, stressing particularly the // deleted/overwritten entries and their computation in the size variation. From 59058626cfd832d144864883cee80d5c0472068b Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Fri, 18 Jun 2021 21:16:47 -0300 Subject: [PATCH 11/38] go mod tidy; not sure what is the issue now.. --- go.mod | 2 +- go.sum | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 2e87353a5..c8f6da680 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/smartystreets/assertions v1.0.0 // indirect github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a // indirect github.com/spaolacci/murmur3 v1.1.0 - github.com/stretchr/testify v1.7.0 // indirect + github.com/stretchr/testify v1.7.0 github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830 // indirect ) diff --git a/go.sum b/go.sum index fdd5a878a..c986cf1f3 100644 --- a/go.sum +++ b/go.sum @@ -271,7 +271,6 @@ github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572 h1:RC6RW7j+ github.com/spacemonkeygo/spacelog v0.0.0-20180420211403-2296661a0572/go.mod h1:w0SWMsp6j9O/dk4/ZpIhL+3CkG8ofA2vuv7k+ltqUMc= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= @@ -379,4 +378,5 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= From 5402beee6bee100cb44c5ae5c50613f7e6542ae7 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Sun, 20 Jun 2021 12:38:45 -0300 Subject: [PATCH 12/38] fix size change sign --- io/directory.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io/directory.go b/io/directory.go index 92b86b728..0938e02b5 100644 --- a/io/directory.go +++ b/io/directory.go @@ -577,8 +577,8 @@ func (d *UpgradeableDirectory) RemoveChild(ctx context.Context, name string) err return nil } - if HAMTShardingSize == 0 && // Option disabled. - hamtDir.sizeChange < 0 { // We haven't reduced the HAMT net size. + if HAMTShardingSize == 0 || // Option disabled. + hamtDir.sizeChange >= 0 { // We haven't reduced the HAMT net size. return nil } From 76d04cd8507094ae8b61df925ad92eb450334d5c Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 29 Jun 2021 16:47:39 -0300 Subject: [PATCH 13/38] go mod tidy --- go.sum | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/go.sum b/go.sum index c986cf1f3..ba44ab4de 100644 --- a/go.sum +++ b/go.sum @@ -273,7 +273,6 @@ github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0b github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -375,8 +374,8 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= +honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= From ad4256fed3a6fdb2fb0a8a9676cf6d4d4e4940f9 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 29 Jul 2021 18:35:30 -0300 Subject: [PATCH 14/38] Update hamt/hamt.go Co-authored-by: Adin Schmahmann --- hamt/hamt.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index 28e6ac9fa..d8cb8f0b5 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -210,7 +210,7 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error { return err } -// SetAndPrevious sets a link poiting to the passed node as the value under the +// SetAndPrevious sets a link pointing to the passed node as the value under the // name key in this Shard or its children. It also returns the previous link // under that name key (if any). func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) { From 009757ebd9a719824e043e40455adaa005d4a25b Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 29 Jul 2021 18:35:41 -0300 Subject: [PATCH 15/38] Update io/directory.go Co-authored-by: Adin Schmahmann --- io/directory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io/directory.go b/io/directory.go index 0938e02b5..65373a788 100644 --- a/io/directory.go +++ b/io/directory.go @@ -437,7 +437,7 @@ func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) { // (to trigger a transition to a BasicDirectory). It returns two `bool`s: // * whether it's below (true) or equal/above (false) // * whether the passed timeout to compute the size has been exceeded -// Instead of enumearting the entire tree we eagerly call EnumLinksAsync +// Instead of enumerating the entire tree we eagerly call EnumLinksAsync // until we either reach a value above the threshold (in that case no need) // to keep counting or the timeout runs out in which case the `below` return // value is not to be trusted as we didn't have time to count enough shards. From 24dbda960a60bbb8efca422545731a8424c777a1 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 3 Aug 2021 13:52:09 -0300 Subject: [PATCH 16/38] feat(dir): add plumbing addLinkChild() to avoid fetching the node (#101) --- io/directory.go | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/io/directory.go b/io/directory.go index 65373a788..f3c6a78bb 100644 --- a/io/directory.go +++ b/io/directory.go @@ -198,17 +198,28 @@ func (d *BasicDirectory) SetCidBuilder(builder cid.Builder) { // AddChild implements the `Directory` interface. It adds (or replaces) // a link to the given `node` under `name`. func (d *BasicDirectory) AddChild(ctx context.Context, name string, node ipld.Node) error { + link, err := ipld.MakeLink(node) + if err != nil { + return err + } + + return d.addLinkChild(ctx, name, link) +} + +// addLinkChild adds the link as an entry to this directory under the given +// name. Plumbing function for the AddChild API. +func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ipld.Link) error { // Remove old link (if it existed; ignore `ErrNotExist` otherwise). err := d.RemoveChild(ctx, name) if err != nil && err != os.ErrNotExist { return err } - err = d.node.AddNodeLink(name, node) + err = d.node.AddRawLink(name, link) if err != nil { return err } - d.addToEstimatedSize(name, node.Cid()) + d.addToEstimatedSize(name, link.Cid) return nil } @@ -398,12 +409,7 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err basicDir.SetCidBuilder(d.GetCidBuilder()) d.ForEachLink(context.TODO(), func(lnk *ipld.Link) error { - node, err := d.dserv.Get(ctx, lnk.Cid) - if err != nil { - return err - } - - err = basicDir.AddChild(ctx, lnk.Name, node) + err := basicDir.addLinkChild(ctx, lnk.Name, lnk) if err != nil { return err } @@ -411,9 +417,6 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err return nil }) // FIXME: The above was adapted from SwitchToSharding. We can probably optimize: - // 1. Do not retrieve the full node but create the link from the - // HAMT entry and add it to the BasicDirectory with a (new) - // plumbing function that operates below the AddChild level. // 2. Do not enumerate all link from scratch (harder than previous // item). We call this function only from the UpgradeableDirectory // when we went below the threshold: the detection will be done through From 5a0232ee3eed99b90fe31c12c1e862ea1d7d91f2 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 3 Aug 2021 13:57:00 -0300 Subject: [PATCH 17/38] remove switchToBasic fixme --- io/directory.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/io/directory.go b/io/directory.go index f3c6a78bb..e1cf52be1 100644 --- a/io/directory.go +++ b/io/directory.go @@ -415,15 +415,12 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err } return nil + // This function enumerates all the links in the Directory requiring all + // shards to be accessible but it is only called *after* sizeBelowThreshold + // returns true, which means we have already enumerated and fetched *all* + // shards in the first place (that's the only way we can be really sure + // we are actually below the threshold). }) - // FIXME: The above was adapted from SwitchToSharding. We can probably optimize: - // 2. Do not enumerate all link from scratch (harder than previous - // item). We call this function only from the UpgradeableDirectory - // when we went below the threshold: the detection will be done through - // (partial) enumeration. We may be able to reuse some of that work - // (likely retaining the links in a cache). (All this is uncertain - // at this point as we don't know how partial enumeration will be - // implemented.) return basicDir, nil } From a8cf38e05b6fe26b9662dd71059a99f9146c5a7b Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 4 Aug 2021 12:58:02 -0300 Subject: [PATCH 18/38] fix erroneous TODO context in switchToBasic --- io/directory.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io/directory.go b/io/directory.go index e1cf52be1..dcfca553f 100644 --- a/io/directory.go +++ b/io/directory.go @@ -244,7 +244,7 @@ func (d *BasicDirectory) EnumLinksAsync(ctx context.Context) <-chan format.LinkR } // ForEachLink implements the `Directory` interface. -func (d *BasicDirectory) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error { +func (d *BasicDirectory) ForEachLink(_ context.Context, f func(*ipld.Link) error) error { for _, l := range d.node.Links() { if err := f(l); err != nil { return err @@ -408,7 +408,7 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err basicDir := newEmptyBasicDirectory(d.dserv) basicDir.SetCidBuilder(d.GetCidBuilder()) - d.ForEachLink(context.TODO(), func(lnk *ipld.Link) error { + d.ForEachLink(ctx, func(lnk *ipld.Link) error { err := basicDir.addLinkChild(ctx, lnk.Name, lnk) if err != nil { return err From 58dfa93ee96beba70199b1ad8615c0ff8ede7353 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 4 Aug 2021 13:05:34 -0300 Subject: [PATCH 19/38] fix(switchToBasic): check error in ForEachLink --- io/directory.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io/directory.go b/io/directory.go index dcfca553f..154c523c3 100644 --- a/io/directory.go +++ b/io/directory.go @@ -174,6 +174,8 @@ func (d *BasicDirectory) computeEstimatedSize() { d.addToEstimatedSize(l.Name, l.Cid) return nil }) + // ForEachLink will never fail traversing the BasicDirectory + // and neither the inner callback `addToEstimatedSize`. } func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) { @@ -408,7 +410,7 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err basicDir := newEmptyBasicDirectory(d.dserv) basicDir.SetCidBuilder(d.GetCidBuilder()) - d.ForEachLink(ctx, func(lnk *ipld.Link) error { + err := d.ForEachLink(ctx, func(lnk *ipld.Link) error { err := basicDir.addLinkChild(ctx, lnk.Name, lnk) if err != nil { return err @@ -421,6 +423,9 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err // shards in the first place (that's the only way we can be really sure // we are actually below the threshold). }) + if err != nil { + return nil, err + } return basicDir, nil } From 2807ede42edca9af57bd54afc8a7faa1c3a8e8d6 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 17 Aug 2021 16:55:40 -0300 Subject: [PATCH 20/38] fix(NewDirectoryFromNode): return UpgradeableDirectory in HAMT case as well --- io/directory.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/io/directory.go b/io/directory.go index 154c523c3..860499592 100644 --- a/io/directory.go +++ b/io/directory.go @@ -154,15 +154,11 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err case format.TDirectory: return &UpgradeableDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil case format.THAMTShard: - // FIXME: We now need to return also the UpgradeableDirectory here. shard, err := hamt.NewHamtFromDag(dserv, node) if err != nil { return nil, err } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil + return &UpgradeableDirectory{&HAMTDirectory{shard, dserv, 0}}, nil } return nil, ErrNotADir From d21d093f44f3cd6ea377132704deb0d1533e357a Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 17 Aug 2021 16:58:00 -0300 Subject: [PATCH 21/38] remove irrelevant fixmes --- hamt/hamt.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index d8cb8f0b5..7edfbad90 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -455,9 +455,6 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * if child.isValueNode() { // Leaf node. This is the base case of this recursive function. - // FIXME: Misleading: the base case also includes a recursive call - // in the case both keys share the same slot in the new child shard - // below. if child.key == key { // We are in the correct shard (tree level) so we modify this child // and return. @@ -473,9 +470,6 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * if value == nil { return nil, os.ErrNotExist - // FIXME: Move this case to the top of the clause as - // if child.key != key - // and remove the indentation on the big if. } // We are in the same slot with another entry with a different key @@ -502,11 +496,6 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * // (which will be nil) to highlight there is no overwrite here: they are // done with different keys to a new (empty) shard. (At best this shard // will create new ones until we find different slots for both.) - // FIXME: These two shouldn't be recursive calls where one function - // adds the child and the other replaces it with a shard to go one - // level deeper. This should just be a simple loop that traverses the - // shared prefix in the key adding as many shards as needed and finally - // inserts both leaf links together. _, err = child.setValue(ctx, hv, key, value) if err != nil { return From 5e1feb4ba8bca62cb1a33c7fab0e23904fefca56 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 17 Aug 2021 16:58:54 -0300 Subject: [PATCH 22/38] remove extra newline --- io/directory.go | 1 - 1 file changed, 1 deletion(-) diff --git a/io/directory.go b/io/directory.go index 860499592..b074f11ff 100644 --- a/io/directory.go +++ b/io/directory.go @@ -335,7 +335,6 @@ func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { // AddChild implements the `Directory` interface. func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - oldChild, err := d.shard.SetAndPrevious(ctx, name, nd) if err != nil { // FIXME: We don't know if SetAndPrevious failed before removing the From 9eaa15ffb24fc6cd5b9cca4aae9d0a3b864da157 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 17 Aug 2021 17:04:00 -0300 Subject: [PATCH 23/38] remove solved fixme --- io/directory.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/io/directory.go b/io/directory.go index b074f11ff..844567e89 100644 --- a/io/directory.go +++ b/io/directory.go @@ -337,9 +337,6 @@ func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { oldChild, err := d.shard.SetAndPrevious(ctx, name, nd) if err != nil { - // FIXME: We don't know if SetAndPrevious failed before removing the - // old entry or not (it certainly failed before *adding* it, right?). - // We should check this. return err } From 3f427705778ee0cb526953ad845a769e557fb353 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Tue, 17 Aug 2021 17:15:49 -0300 Subject: [PATCH 24/38] go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index ba44ab4de..7ce062ee0 100644 --- a/go.sum +++ b/go.sum @@ -377,5 +377,4 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= -honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= From dd933a4869d07cfc0140d9fdee900a7b25fd9bf7 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 26 Aug 2021 13:32:31 -0300 Subject: [PATCH 25/38] Remove tiemout and refactor sizeBelowThreshold (#102) * fix(dir): remove timeout and evaluate switch *before* removing entry * BLOCKING: temporary fix * remove Shard cid not used and not maintained * update blocking readme * resolve FIXME * fix: add back concurrent option in dag.walk * decouple switch from directory downgrade --- hamt/hamt.go | 10 ++-- io/directory.go | 147 ++++++++++++++++++++---------------------------- 2 files changed, 66 insertions(+), 91 deletions(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index 7edfbad90..2f50a6f7e 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -43,8 +43,6 @@ func (ds *Shard) isValueNode() bool { // A Shard represents the HAMT. It should be initialized with NewShard(). type Shard struct { - cid cid.Cid - childer *childer tableSize int @@ -123,7 +121,6 @@ func NewHamtFromDag(dserv ipld.DAGService, nd ipld.Node) (*Shard, error) { ds.childer.makeChilder(fsn.Data(), pbnd.Links()) - ds.cid = pbnd.Cid() ds.hashFunc = fsn.HashType() ds.builder = pbnd.CidBuilder() @@ -355,7 +352,12 @@ func (ds *Shard) EnumLinksAsync(ctx context.Context) <-chan format.LinkResult { defer cancel() getLinks := makeAsyncTrieGetLinks(ds.dserv, linkResults) cset := cid.NewSet() - err := dag.Walk(ctx, getLinks, ds.cid, cset.Visit, dag.Concurrent()) + rootNode, err := ds.Node() + if err != nil { + emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err}) + return + } + err = dag.Walk(ctx, getLinks, rootNode.Cid(), cset.Visit, dag.Concurrent()) if err != nil { emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err}) } diff --git a/io/directory.go b/io/directory.go index 844567e89..94839c1c1 100644 --- a/io/directory.go +++ b/io/directory.go @@ -3,13 +3,10 @@ package io import ( "context" "fmt" - "os" - "time" - mdag "github.com/ipfs/go-merkledag" - format "github.com/ipfs/go-unixfs" "github.com/ipfs/go-unixfs/hamt" + "os" "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" @@ -26,11 +23,6 @@ var log = logging.Logger("unixfs") // ProtoNode doesn't use the Data field so this estimate is pretty accurate). var HAMTShardingSize = 0 -// Time in seconds allowed to fetch the shards to compute the size before -// returning an error. -// FIXME: Adjust to sane value. -var EvaluateHAMTTransitionTimeout = time.Duration(1) - // DefaultShardWidth is the default value used for hamt sharding width. var DefaultShardWidth = 256 @@ -430,15 +422,42 @@ func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) { d.sizeChange -= estimatedLinkSize(name, linkCid) } -// Evaluate directory size and check if it's below HAMTShardingSize threshold -// (to trigger a transition to a BasicDirectory). It returns two `bool`s: -// * whether it's below (true) or equal/above (false) -// * whether the passed timeout to compute the size has been exceeded +// FIXME: Will be extended later to the `AddEntry` case. +func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, nameToRemove string) (switchToBasic bool, err error) { + if HAMTShardingSize == 0 { // Option disabled. + return false, nil + } + + entryToRemove, err := d.shard.Find(ctx, nameToRemove) + if err == os.ErrNotExist { + // Nothing to remove, no point in evaluating a switch. + return false, nil + } else if err != nil { + return false, err + } + sizeToRemove := estimatedLinkSize(nameToRemove, entryToRemove.Cid) + + if d.sizeChange-sizeToRemove >= 0 { + // We won't have reduced the HAMT net size. + return false, nil + } + + // We have reduced the directory size, check if went below the + // HAMTShardingSize threshold to trigger a switch. + belowThreshold, err := d.sizeBelowThreshold(ctx, -sizeToRemove) + if err != nil { + return false, err + } + return belowThreshold, nil +} + +// Evaluate directory size and a future sizeChange and check if it will be below +// HAMTShardingSize threshold (to trigger a transition to a BasicDirectory). // Instead of enumerating the entire tree we eagerly call EnumLinksAsync -// until we either reach a value above the threshold (in that case no need) -// to keep counting or the timeout runs out in which case the `below` return -// value is not to be trusted as we didn't have time to count enough shards. -func (d *HAMTDirectory) sizeBelowThreshold(timeout time.Duration) (below bool, timeoutExceeded bool) { +// until we either reach a value above the threshold (in that case no need +// to keep counting) or an error occurs (like the context being canceled +// if we take too much time fetching the necessary shards). +func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) (below bool, err error) { if HAMTShardingSize == 0 { panic("asked to compute HAMT size with HAMTShardingSize option off (0)") } @@ -447,57 +466,25 @@ func (d *HAMTDirectory) sizeBelowThreshold(timeout time.Duration) (below bool, t // end early if we already know we're above the threshold or run out of time. partialSize := 0 - ctx, cancel := context.WithTimeout(context.Background(), time.Second*timeout) + // We stop the enumeration once we have enough information and exit this function. + ctx, cancel := context.WithCancel(ctx) defer cancel() + for linkResult := range d.EnumLinksAsync(ctx) { if linkResult.Err != nil { - continue - // The timeout exceeded errors will be coming through here but I'm - // not sure if we can just compare against a generic DeadlineExceeded - // error here to return early and avoid iterating the entire loop. - // (We might confuse a specific DeadlineExceeded of an internal function - // with our context here.) - // Since *our* DeadlineExceeded will quickly propagate to any other - // pending fetches it seems that iterating the entire loop won't add - // much more cost anyway. - // FIXME: Check the above reasoning. + return false, linkResult.Err } - if linkResult.Link == nil { - panic("empty link result (both values nil)") - // FIXME: Is this *ever* possible? - } - partialSize += estimatedLinkSize(linkResult.Link.Name, linkResult.Link.Cid) - if partialSize >= HAMTShardingSize { + partialSize += estimatedLinkSize(linkResult.Link.Name, linkResult.Link.Cid) + if partialSize+sizeChange >= HAMTShardingSize { // We have already fetched enough shards to assert we are // above the threshold, so no need to keep fetching. - return false, false + return false, nil } } - // At this point either we enumerated all shards or run out of time. - // Figure out which. - - if ctx.Err() == context.Canceled { - panic("the context was canceled but we're still evaluating a possible switch") - } - if partialSize >= HAMTShardingSize { - panic("we reach the threshold but we're still evaluating a possible switch") - } - - if ctx.Err() == context.DeadlineExceeded { - return false, true - } - // If we reach this then: - // * We are below the threshold (we didn't return inside the EnumLinksAsync - // loop). - // * The context wasn't cancelled so we iterated *all* shards - // and are sure that we have the full size. - // FIXME: Can we actually verify the last claim here to be sure? - // (Iterating all the shards in the HAMT as a plumbing function maybe. - // If they're in memory it shouldn't be that expensive, we won't be - // switching that often, probably.) - return true, false + // We enumerated *all* links in all shards and didn't reach the threshold. + return true, nil } // UpgradeableDirectory wraps a Directory interface and provides extra logic @@ -565,42 +552,28 @@ func (d *UpgradeableDirectory) getDagService() ipld.DAGService { // sure we make good on the value). Finding the right margin can be tricky // and very dependent on the use case so it might not be worth it. func (d *UpgradeableDirectory) RemoveChild(ctx context.Context, name string) error { - if err := d.Directory.RemoveChild(ctx, name); err != nil { - return err - } - hamtDir, ok := d.Directory.(*HAMTDirectory) - if !ok { // BasicDirectory - return nil + if !ok { + return d.Directory.RemoveChild(ctx, name) } - if HAMTShardingSize == 0 || // Option disabled. - hamtDir.sizeChange >= 0 { // We haven't reduced the HAMT net size. - return nil + switchToBasic, err := hamtDir.needsToSwitchToBasicDir(ctx, name) + if err != nil { + return err } - // We have reduced the directory size, check if it didn't go under - // the HAMTShardingSize threshold. - belowThreshold, timeoutExceeded := hamtDir.sizeBelowThreshold(EvaluateHAMTTransitionTimeout) - - if timeoutExceeded { - // We run out of time before confirming if we're indeed below the - // threshold. When in doubt error to not return inconsistent structures. - // FIXME: We could allow this to return without error and enforce this - // timeout on a GetNode() call when we need to actually commit to a - // structure/CID. (The downside is that GetNode() doesn't have a - // context argument and we would have to break the API.) - return fmt.Errorf("not enought time to fetch shards") - // FIXME: Abstract in new error for testing. + if !switchToBasic { + return hamtDir.RemoveChild(ctx, name) } - if belowThreshold { // Switch. - basicDir, err := hamtDir.switchToBasic(ctx) - if err != nil { - return err - } - d.Directory = basicDir + basicDir, err := hamtDir.switchToBasic(ctx) + if err != nil { + return err } - + basicDir.RemoveChild(ctx, name) + if err != nil { + return err + } + d.Directory = basicDir return nil } From d07bf0064e98d08290abbc98872a2400381ac1d0 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 26 Aug 2021 13:44:06 -0300 Subject: [PATCH 26/38] fix(directory): add unsharding logic for AddChild case (#103) --- io/directory.go | 125 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 38 deletions(-) diff --git a/io/directory.go b/io/directory.go index 94839c1c1..5db1b3c61 100644 --- a/io/directory.go +++ b/io/directory.go @@ -196,10 +196,32 @@ func (d *BasicDirectory) AddChild(ctx context.Context, name string, node ipld.No return d.addLinkChild(ctx, name, link) } +func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node) (bool, error) { + if HAMTShardingSize == 0 { // Option disabled. + return false, nil + } + + operationSizeChange := 0 + // Find if there is an old entry under that name that will be overwritten. + entryToRemove, err := d.node.GetNodeLink(name) + if err != mdag.ErrLinkNotFound { + if err != nil { + return false, err + } + operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid) + } + if nodeToAdd != nil { + operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid()) + } + + return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil +} + // addLinkChild adds the link as an entry to this directory under the given // name. Plumbing function for the AddChild API. func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ipld.Link) error { - // Remove old link (if it existed; ignore `ErrNotExist` otherwise). + // Remove old link and account for size change (if it existed; ignore + // `ErrNotExist` otherwise). err := d.RemoveChild(ctx, name) if err != nil && err != os.ErrNotExist { return err @@ -294,7 +316,7 @@ func (d *BasicDirectory) GetCidBuilder() cid.Builder { } // SwitchToSharding returns a HAMT implementation of this directory. -func (d *BasicDirectory) SwitchToSharding(ctx context.Context) (Directory, error) { +func (d *BasicDirectory) SwitchToSharding(ctx context.Context) (*HAMTDirectory, error) { hamtDir := new(HAMTDirectory) hamtDir.dserv = d.dserv @@ -422,33 +444,42 @@ func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) { d.sizeChange -= estimatedLinkSize(name, linkCid) } -// FIXME: Will be extended later to the `AddEntry` case. -func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, nameToRemove string) (switchToBasic bool, err error) { +// Evaluate a switch from HAMTDirectory to BasicDirectory in case the size will +// go above the threshold when we are adding or removing an entry. +// In both the add/remove operations any old name will be removed, and for the +// add operation in particular a new entry will be added under that name (otherwise +// nodeToAdd is nil). We compute both (potential) future subtraction and +// addition to the size change. +func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string, nodeToAdd ipld.Node) (switchToBasic bool, err error) { if HAMTShardingSize == 0 { // Option disabled. return false, nil } - entryToRemove, err := d.shard.Find(ctx, nameToRemove) - if err == os.ErrNotExist { - // Nothing to remove, no point in evaluating a switch. - return false, nil - } else if err != nil { - return false, err + operationSizeChange := 0 + + // Find if there is an old entry under that name that will be overwritten + // (AddEntry) or flat out removed (RemoveEntry). + entryToRemove, err := d.shard.Find(ctx, name) + if err != os.ErrNotExist { + if err != nil { + return false, err + } + operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid) + } + + // For the AddEntry case compute the size addition of the new entry. + if nodeToAdd != nil { + operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid()) } - sizeToRemove := estimatedLinkSize(nameToRemove, entryToRemove.Cid) - if d.sizeChange-sizeToRemove >= 0 { + if d.sizeChange+operationSizeChange >= 0 { // We won't have reduced the HAMT net size. return false, nil } // We have reduced the directory size, check if went below the // HAMTShardingSize threshold to trigger a switch. - belowThreshold, err := d.sizeBelowThreshold(ctx, -sizeToRemove) - if err != nil { - return false, err - } - return belowThreshold, nil + return d.sizeBelowThreshold(ctx, operationSizeChange) } // Evaluate directory size and a future sizeChange and check if it will be below @@ -503,32 +534,50 @@ var _ Directory = (*UpgradeableDirectory)(nil) // AddChild implements the `Directory` interface. We check when adding new entries // if we should switch to HAMTDirectory according to global option(s). func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - err := d.Directory.AddChild(ctx, name, nd) + hamtDir, ok := d.Directory.(*HAMTDirectory) + if ok { + // We evaluate a switch in the HAMTDirectory case even for an AddChild + // as it may overwrite an existing entry and end up actually reducing + // the directory size. + switchToBasic, err := hamtDir.needsToSwitchToBasicDir(ctx, name, nd) + if err != nil { + return err + } + + if switchToBasic { + basicDir, err := hamtDir.switchToBasic(ctx) + if err != nil { + return err + } + err = basicDir.AddChild(ctx, name, nd) + if err != nil { + return err + } + d.Directory = basicDir + return nil + } + + return d.Directory.AddChild(ctx, name, nd) + } + + // BasicDirectory + basicDir := d.Directory.(*BasicDirectory) + switchToHAMT, err := basicDir.needsToSwitchToHAMTDir(name, nd) if err != nil { return err } - - // Evaluate possible HAMT upgrade. - if HAMTShardingSize == 0 { - return nil + if !switchToHAMT { + return basicDir.AddChild(ctx, name, nd) } - basicDir, ok := d.Directory.(*BasicDirectory) - if !ok { - return nil + hamtDir, err = basicDir.SwitchToSharding(ctx) + if err != nil { + return err } - if basicDir.estimatedSize >= HAMTShardingSize { - // Ideally to minimize performance we should check if this last - // `AddChild` call would bring the directory size over the threshold - // *before* executing it since we would end up switching anyway and - // that call would be "wasted". This is a minimal performance impact - // and we prioritize a simple code base. - hamtDir, err := basicDir.SwitchToSharding(ctx) - if err != nil { - return err - } - d.Directory = hamtDir + hamtDir.AddChild(ctx, name, nd) + if err != nil { + return err } - + d.Directory = hamtDir return nil } @@ -557,7 +606,7 @@ func (d *UpgradeableDirectory) RemoveChild(ctx context.Context, name string) err return d.Directory.RemoveChild(ctx, name) } - switchToBasic, err := hamtDir.needsToSwitchToBasicDir(ctx, name) + switchToBasic, err := hamtDir.needsToSwitchToBasicDir(ctx, name, nil) if err != nil { return err } From 9ea81f23053b49720a110e15726a87d2713de4e1 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Fri, 22 Oct 2021 18:20:44 -0300 Subject: [PATCH 27/38] Tests for unsharding PR (#99) - Add tests for automatic unsharding - Modified some internals to be sufficiently extensible for testing --- hamt/hamt.go | 41 ++- hamt/util.go | 18 +- internal/config.go | 3 + io/completehamt_test.go | 95 +++++++ io/directory.go | 42 ++-- io/directory_test.go | 467 +++++++++++++++++++++++++---------- private/linksize/linksize.go | 5 + 7 files changed, 503 insertions(+), 168 deletions(-) create mode 100644 internal/config.go create mode 100644 io/completehamt_test.go create mode 100644 private/linksize/linksize.go diff --git a/hamt/hamt.go b/hamt/hamt.go index 2f50a6f7e..7e770c9d6 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -25,11 +25,13 @@ import ( "fmt" "os" + format "github.com/ipfs/go-unixfs" + "github.com/ipfs/go-unixfs/internal" + bitfield "github.com/ipfs/go-bitfield" cid "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" dag "github.com/ipfs/go-merkledag" - format "github.com/ipfs/go-unixfs" ) const ( @@ -37,6 +39,10 @@ const ( HashMurmur3 uint64 = 0x22 ) +func init() { + internal.HAMTHashFunction = murmur3Hash +} + func (ds *Shard) isValueNode() bool { return ds.key != "" && ds.val != nil } @@ -45,17 +51,29 @@ func (ds *Shard) isValueNode() bool { type Shard struct { childer *childer - tableSize int + // Entries per node (number of possible childs indexed by the partial key). + tableSize int + // Bits needed to encode child indexes (log2 of number of entries). This is + // the number of bits taken from the hash key on each level of the tree. tableSizeLg2 int builder cid.Builder hashFunc uint64 + // String format with number of zeros that will be present in the hexadecimal + // encoding of the child index to always reach the fixed maxpadlen chars. + // Example: maxpadlen = 4 => prefixPadStr: "%04X" (print number in hexadecimal + // format padding with zeros to always reach 4 characters). prefixPadStr string - maxpadlen int + // Length in chars of string that encodes child indexes. We encode indexes + // as hexadecimal strings to this is log4 of number of entries. + maxpadlen int dserv ipld.DAGService + // FIXME: Remove. We don't actually store "value nodes". This confusing + // abstraction just removes the maxpadlen from the link names to extract + // the actual value link the trie is storing. // leaf node key string val *ipld.Link @@ -68,12 +86,13 @@ func NewShard(dserv ipld.DAGService, size int) (*Shard, error) { return nil, err } + // FIXME: Make this at least a static configuration for testing. ds.hashFunc = HashMurmur3 return ds, nil } func makeShard(ds ipld.DAGService, size int) (*Shard, error) { - lg2s, err := logtwo(size) + lg2s, err := Logtwo(size) if err != nil { return nil, err } @@ -211,7 +230,7 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error { // name key in this Shard or its children. It also returns the previous link // under that name key (if any). func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) { - hv := &hashBits{b: hash([]byte(name))} + hv := newHashBits(name) err := ds.dserv.Add(ctx, node) if err != nil { return nil, err @@ -221,6 +240,9 @@ func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node if err != nil { return nil, err } + + // FIXME: We don't need to set the name here, it will get overwritten. + // This is confusing, confirm and remove this line. lnk.Name = ds.linkNamePrefix(0) + name return ds.setValue(ctx, hv, name, lnk) @@ -236,13 +258,13 @@ func (ds *Shard) Remove(ctx context.Context, name string) error { // RemoveAndPrevious is similar to the public Remove but also returns the // old removed link (if it exists). func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) { - hv := &hashBits{b: hash([]byte(name))} + hv := newHashBits(name) return ds.setValue(ctx, hv, name, nil) } // Find searches for a child node by 'name' within this hamt func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) { - hv := &hashBits{b: hash([]byte(name))} + hv := newHashBits(name) var out *ipld.Link err := ds.getValue(ctx, hv, name, func(sv *Shard) error { @@ -489,10 +511,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * return nil, err } child.builder = ds.builder - chhv := &hashBits{ - b: hash([]byte(grandChild.key)), - consumed: hv.consumed, - } + chhv := newConsumedHashBits(grandChild.key, hv.consumed) // We explicitly ignore the oldValue returned by the next two insertions // (which will be nil) to highlight there is no overwrite here: they are diff --git a/hamt/util.go b/hamt/util.go index 7ae02dfb3..29f59435e 100644 --- a/hamt/util.go +++ b/hamt/util.go @@ -2,9 +2,11 @@ package hamt import ( "fmt" + "math/bits" + + "github.com/ipfs/go-unixfs/internal" "github.com/spaolacci/murmur3" - "math/bits" ) // hashBits is a helper that allows the reading of the 'next n bits' as an integer. @@ -13,6 +15,16 @@ type hashBits struct { consumed int } +func newHashBits(val string) *hashBits { + return &hashBits{b: internal.HAMTHashFunction([]byte(val))} +} + +func newConsumedHashBits(val string, consumed int) *hashBits { + hv := &hashBits{b: internal.HAMTHashFunction([]byte(val))} + hv.consumed = consumed + return hv +} + func mkmask(n int) byte { return (1 << uint(n)) - 1 } @@ -50,7 +62,7 @@ func (hb *hashBits) next(i int) int { } } -func logtwo(v int) (int, error) { +func Logtwo(v int) (int, error) { if v <= 0 { return 0, fmt.Errorf("hamt size should be a power of two") } @@ -61,7 +73,7 @@ func logtwo(v int) (int, error) { return lg2, nil } -func hash(val []byte) []byte { +func murmur3Hash(val []byte) []byte { h := murmur3.New64() h.Write(val) return h.Sum(nil) diff --git a/internal/config.go b/internal/config.go new file mode 100644 index 000000000..9250ae2ae --- /dev/null +++ b/internal/config.go @@ -0,0 +1,3 @@ +package internal + +var HAMTHashFunction func(val []byte) []byte diff --git a/io/completehamt_test.go b/io/completehamt_test.go new file mode 100644 index 000000000..1bb3d8720 --- /dev/null +++ b/io/completehamt_test.go @@ -0,0 +1,95 @@ +package io + +import ( + "context" + "encoding/binary" + "fmt" + "math" + "testing" + + mdtest "github.com/ipfs/go-merkledag/test" + "github.com/stretchr/testify/assert" + + "github.com/ipfs/go-unixfs" + "github.com/ipfs/go-unixfs/hamt" + + ipld "github.com/ipfs/go-ipld-format" +) + +// CreateCompleteHAMT creates a HAMT the following properties: +// * its height (distance/edges from root to deepest node) is specified by treeHeight. +// * all leaf Shard nodes have the same depth (and have only 'value' links). +// * all internal Shard nodes point only to other Shards (and hence have zero 'value' links). +// * the total number of 'value' links (directory entries) is: +// io.DefaultShardWidth ^ (treeHeight + 1). +// FIXME: HAMTHashFunction needs to be set to idHash by the caller. We depend on +// this simplification for the current logic to work. (HAMTHashFunction is a +// global setting of the package, it is hard-coded in the serialized Shard node +// and not allowed to be changed on a per HAMT/Shard basis.) +// (If we didn't rehash inside setValue then we could just generate +// the fake hash as in io.SetAndPrevious through `newHashBits()` and pass +// it as an argument making the hash independent of tree manipulation; that +// sounds as the correct way to go in general and we wouldn't need this.) +func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int, childsPerNode int) (ipld.Node, error) { + if treeHeight < 1 { + panic("treeHeight < 1") + } + if treeHeight > 8 { + panic("treeHeight > 8: we don't allow a key larger than what can be encoded in a 64-bit word") + } + + rootShard, err := hamt.NewShard(ds, childsPerNode) + if err != nil { + return nil, err + } + + // Assuming we are using the ID hash function we can just insert all + // the combinations of a byte slice that will reach the desired height. + totalChildren := int(math.Pow(float64(childsPerNode), float64(treeHeight))) + log2ofChilds, err := hamt.Logtwo(childsPerNode) + if err != nil { + return nil, err + } + if log2ofChilds*treeHeight%8 != 0 { + return nil, fmt.Errorf("childsPerNode * treeHeight should be multiple of 8") + } + bytesInKey := log2ofChilds * treeHeight / 8 + for i := 0; i < totalChildren; i++ { + var hashbuf [8]byte + binary.LittleEndian.PutUint64(hashbuf[:], uint64(i)) + var oldLink *ipld.Link + oldLink, err = rootShard.SetAndPrevious(context.Background(), string(hashbuf[:bytesInKey]), unixfs.EmptyFileNode()) + if err != nil { + return nil, err + } + if oldLink != nil { + // We shouldn't be overwriting any value, otherwise the tree + // won't be complete. + return nil, fmt.Errorf("we have overwritten entry %s", + oldLink.Cid) + } + } + + return rootShard.Node() +} + +// Return the same value as the hash. +func idHash(val []byte) []byte { + return val +} + +func TestCreateCompleteShard(t *testing.T) { + ds := mdtest.Mock() + childsPerNode := 16 + treeHeight := 2 + node, err := CreateCompleteHAMT(ds, treeHeight, childsPerNode) + assert.NoError(t, err) + + shard, err := hamt.NewHamtFromDag(ds, node) + assert.NoError(t, err) + links, err := shard.EnumLinks(context.Background()) + assert.NoError(t, err) + + childNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight))) + assert.Equal(t, childNodes, len(links)) +} diff --git a/io/directory.go b/io/directory.go index 5db1b3c61..b8915a3b0 100644 --- a/io/directory.go +++ b/io/directory.go @@ -3,14 +3,16 @@ package io import ( "context" "fmt" - mdag "github.com/ipfs/go-merkledag" - format "github.com/ipfs/go-unixfs" - "github.com/ipfs/go-unixfs/hamt" "os" + "github.com/ipfs/go-unixfs/hamt" + "github.com/ipfs/go-unixfs/private/linksize" + "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" logging "github.com/ipfs/go-log" + mdag "github.com/ipfs/go-merkledag" + format "github.com/ipfs/go-unixfs" ) var log = logging.Logger("unixfs") @@ -24,6 +26,7 @@ var log = logging.Logger("unixfs") var HAMTShardingSize = 0 // DefaultShardWidth is the default value used for hamt sharding width. +// Needs to be a power of two (shard entry size) and multiple of 8 (bitfield size). var DefaultShardWidth = 256 // Directory defines a UnixFS directory. It is used for creating, reading and @@ -78,7 +81,9 @@ func productionLinkSize(linkName string, linkCid cid.Cid) int { return len(linkName) + linkCid.ByteLen() } -var estimatedLinkSize = productionLinkSize +func init() { + linksize.LinkSizeFunction = productionLinkSize +} // BasicDirectory is the basic implementation of `Directory`. All the entries // are stored in a single node. @@ -167,11 +172,11 @@ func (d *BasicDirectory) computeEstimatedSize() { } func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) { - d.estimatedSize += estimatedLinkSize(name, linkCid) + d.estimatedSize += linksize.LinkSizeFunction(name, linkCid) } func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) { - d.estimatedSize -= estimatedLinkSize(name, linkCid) + d.estimatedSize -= linksize.LinkSizeFunction(name, linkCid) if d.estimatedSize < 0 { // Something has gone very wrong. Log an error and recompute the // size from scratch. @@ -208,10 +213,10 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node if err != nil { return false, err } - operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid) + operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid) } if nodeToAdd != nil { - operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid()) + operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil @@ -437,11 +442,11 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err } func (d *HAMTDirectory) addToSizeChange(name string, linkCid cid.Cid) { - d.sizeChange += estimatedLinkSize(name, linkCid) + d.sizeChange += linksize.LinkSizeFunction(name, linkCid) } func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) { - d.sizeChange -= estimatedLinkSize(name, linkCid) + d.sizeChange -= linksize.LinkSizeFunction(name, linkCid) } // Evaluate a switch from HAMTDirectory to BasicDirectory in case the size will @@ -464,12 +469,12 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string if err != nil { return false, err } - operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid) + operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid) } // For the AddEntry case compute the size addition of the new entry. if nodeToAdd != nil { - operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid()) + operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid()) } if d.sizeChange+operationSizeChange >= 0 { @@ -506,7 +511,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) return false, linkResult.Err } - partialSize += estimatedLinkSize(linkResult.Link.Name, linkResult.Link.Cid) + partialSize += linksize.LinkSizeFunction(linkResult.Link.Name, linkResult.Link.Cid) if partialSize+sizeChange >= HAMTShardingSize { // We have already fetched enough shards to assert we are // above the threshold, so no need to keep fetching. @@ -581,17 +586,6 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl return nil } -func (d *UpgradeableDirectory) getDagService() ipld.DAGService { - switch v := d.Directory.(type) { - case *BasicDirectory: - return v.dserv - case *HAMTDirectory: - return v.dserv - default: - panic("unknown directory type") - } -} - // RemoveChild implements the `Directory` interface. Used in the case where we wrap // a HAMTDirectory that might need to be downgraded to a BasicDirectory. The // upgrade path is in AddChild. diff --git a/io/directory_test.go b/io/directory_test.go index 218f25822..909f9b4fd 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -5,8 +5,11 @@ import ( "fmt" "math" "sort" + "strconv" "strings" + "sync" "testing" + "time" cid "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" @@ -14,6 +17,9 @@ import ( mdtest "github.com/ipfs/go-merkledag/test" ft "github.com/ipfs/go-unixfs" + "github.com/ipfs/go-unixfs/hamt" + "github.com/ipfs/go-unixfs/internal" + "github.com/ipfs/go-unixfs/private/linksize" "github.com/stretchr/testify/assert" ) @@ -105,161 +111,298 @@ func TestDuplicateAddDir(t *testing.T) { } } -// FIXME: Nothing blocking but nice to have: -// * Check estimated size against link enumeration (indirectly done in the -// restored node check from NewDirectoryFromNode). -// * Check estimated size against encoded node (the difference should only be -// a small percentage for a directory with 10s of entries). -// FIXME: Add a test for the HAMT sizeChange abstracting some of the code from -// this one. func TestBasicDirectory_estimatedSize(t *testing.T) { ds := mdtest.Mock() + basicDir := newEmptyBasicDirectory(ds) + + testDirectorySizeEstimation(t, basicDir, ds, func(dir Directory) int { + return dir.(*BasicDirectory).estimatedSize + }) +} + +func TestHAMTDirectory_sizeChange(t *testing.T) { + ds := mdtest.Mock() + hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + assert.NoError(t, err) + + testDirectorySizeEstimation(t, hamtDir, ds, func(dir Directory) int { + // Since we created a HAMTDirectory from scratch with size 0 its + // internal sizeChange delta will in fact track the directory size + // throughout this run. + return dir.(*HAMTDirectory).sizeChange + }) +} + +func fullSizeEnumeration(dir Directory) int { + size := 0 + dir.ForEachLink(context.Background(), func(l *ipld.Link) error { + size += linksize.LinkSizeFunction(l.Name, l.Cid) + return nil + }) + return size +} + +func testDirectorySizeEstimation(t *testing.T, dir Directory, ds ipld.DAGService, size func(Directory) int) { + linksize.LinkSizeFunction = mockLinkSizeFunc(1) + defer func() { linksize.LinkSizeFunction = productionLinkSize }() + ctx := context.Background() child := ft.EmptyFileNode() - err := ds.Add(ctx, child) - if err != nil { - t.Fatal(err) - } - - basicDir := newEmptyBasicDirectory(ds) + assert.NoError(t, ds.Add(ctx, child)) // Several overwrites should not corrupt the size estimation. - basicDir.AddChild(ctx, "child", child) - basicDir.AddChild(ctx, "child", child) - basicDir.AddChild(ctx, "child", child) - basicDir.RemoveChild(ctx, "child") - basicDir.AddChild(ctx, "child", child) - basicDir.RemoveChild(ctx, "child") - // FIXME: Check errors above (abstract adds/removals in iteration). - if basicDir.estimatedSize != 0 { - t.Fatal("estimated size is not zero after removing all entries") - } - - for i := 0; i < 100; i++ { - basicDir.AddChild(ctx, fmt.Sprintf("child-%03d", i), child) // e.g., "child-045" - } - // Estimated entry size: name (9) + CID (32 from hash and 2 extra for header) - entrySize := 9 + 32 + 2 - expectedSize := 100 * entrySize - if basicDir.estimatedSize != expectedSize { - t.Fatalf("estimated size (%d) inaccurate after adding many entries (expected %d)", - basicDir.estimatedSize, expectedSize) - } - - basicDir.RemoveChild(ctx, "child-045") // just random values - basicDir.RemoveChild(ctx, "child-063") - basicDir.RemoveChild(ctx, "child-011") - basicDir.RemoveChild(ctx, "child-000") - basicDir.RemoveChild(ctx, "child-099") - - basicDir.RemoveChild(ctx, "child-045") // already removed, won't impact size - basicDir.RemoveChild(ctx, "nonexistent-name") // also doesn't count - basicDir.RemoveChild(ctx, "child-100") // same - expectedSize -= 5 * entrySize - if basicDir.estimatedSize != expectedSize { - t.Fatalf("estimated size (%d) inaccurate after removing some entries (expected %d)", - basicDir.estimatedSize, expectedSize) - } + assert.NoError(t, dir.AddChild(ctx, "child", child)) + assert.NoError(t, dir.AddChild(ctx, "child", child)) + assert.NoError(t, dir.AddChild(ctx, "child", child)) + assert.NoError(t, dir.RemoveChild(ctx, "child")) + assert.NoError(t, dir.AddChild(ctx, "child", child)) + assert.NoError(t, dir.RemoveChild(ctx, "child")) + assert.Equal(t, 0, size(dir), "estimated size is not zero after removing all entries") + + dirEntries := 100 + for i := 0; i < dirEntries; i++ { + assert.NoError(t, dir.AddChild(ctx, fmt.Sprintf("child-%03d", i), child)) + } + assert.Equal(t, dirEntries, size(dir), "estimated size inaccurate after adding many entries") + + assert.NoError(t, dir.RemoveChild(ctx, "child-045")) // just random values + assert.NoError(t, dir.RemoveChild(ctx, "child-063")) + assert.NoError(t, dir.RemoveChild(ctx, "child-011")) + assert.NoError(t, dir.RemoveChild(ctx, "child-000")) + assert.NoError(t, dir.RemoveChild(ctx, "child-099")) + dirEntries -= 5 + assert.Equal(t, dirEntries, size(dir), "estimated size inaccurate after removing some entries") + + // All of the following remove operations will fail (won't impact dirEntries): + assert.Error(t, dir.RemoveChild(ctx, "nonexistent-name")) + assert.Error(t, dir.RemoveChild(ctx, "child-045")) // already removed + assert.Error(t, dir.RemoveChild(ctx, "child-100")) + assert.Equal(t, dirEntries, size(dir), "estimated size inaccurate after failed remove attempts") // Restore a directory from original's node and check estimated size consistency. - basicDirSingleNode, _ := basicDir.GetNode() // no possible error - restoredBasicDir := newBasicDirectoryFromNode(ds, basicDirSingleNode.(*mdag.ProtoNode)) - if basicDir.estimatedSize != restoredBasicDir.estimatedSize { - t.Fatalf("restored basic directory size (%d) doesn't match original estimate (%d)", - basicDir.estimatedSize, restoredBasicDir.estimatedSize) - } + dirNode, err := dir.GetNode() + assert.NoError(t, err) + restoredDir, err := NewDirectoryFromNode(ds, dirNode.(*mdag.ProtoNode)) + assert.NoError(t, err) + assert.Equal(t, size(dir), fullSizeEnumeration(restoredDir), "restored directory's size doesn't match original's") + // We don't use the estimation size function for the restored directory + // because in the HAMT case this function depends on the sizeChange variable + // that will be cleared when loading the directory from the node. + // This also covers the case of comparing the size estimation `size()` with + // the full enumeration function `fullSizeEnumeration()` to make sure it's + // correct. } -// FIXME: Add a similar one for HAMT directory, stressing particularly the -// deleted/overwritten entries and their computation in the size variation. - +// Any entry link size will have the fixedSize passed. func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int { return func(_ string, _ cid.Cid) int { return fixedSize } } -// Basic test on extreme threshold to trigger switch. More fine-grained sizes -// are checked in TestBasicDirectory_estimatedSize (without the swtich itself -// but focusing on the size computation). -// FIXME: Ideally, instead of checking size computation on one test and directory -// upgrade on another a better structured test should test both dimensions -// simultaneously. -func TestUpgradeableDirectory(t *testing.T) { - // FIXME: Modifying these static configuraitons is probably not - // concurrent-friendly. +func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { + if _, ok := dir.(*UpgradeableDirectory).Directory.(*BasicDirectory); !ok { + t.Fatal(errorMessage) + } +} + +func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { + if _, ok := dir.(*UpgradeableDirectory).Directory.(*HAMTDirectory); !ok { + t.Fatal(errorMessage) + } +} + +func TestProductionLinkSize(t *testing.T) { + link, err := ipld.MakeLink(ft.EmptyDirNode()) + assert.NoError(t, err) + link.Name = "directory_link_name" + assert.Equal(t, 53, productionLinkSize(link.Name, link.Cid)) + + link, err = ipld.MakeLink(ft.EmptyFileNode()) + assert.NoError(t, err) + link.Name = "file_link_name" + assert.Equal(t, 48, productionLinkSize(link.Name, link.Cid)) + + ds := mdtest.Mock() + basicDir := newEmptyBasicDirectory(ds) + assert.NoError(t, err) + for i := 0; i < 10; i++ { + basicDir.AddChild(context.Background(), strconv.FormatUint(uint64(i), 10), ft.EmptyFileNode()) + } + basicDirNode, err := basicDir.GetNode() + assert.NoError(t, err) + link, err = ipld.MakeLink(basicDirNode) + assert.NoError(t, err) + link.Name = "basic_dir" + assert.Equal(t, 43, productionLinkSize(link.Name, link.Cid)) +} + +// Test HAMTDirectory <-> BasicDirectory switch based on directory size. The +// switch is managed by the UpgradeableDirectory abstraction. +func TestUpgradeableDirectorySwitch(t *testing.T) { oldHamtOption := HAMTShardingSize defer func() { HAMTShardingSize = oldHamtOption }() - estimatedLinkSize = mockLinkSizeFunc(1) - defer func() { estimatedLinkSize = productionLinkSize }() + HAMTShardingSize = 0 // Disable automatic switch at the start. + linksize.LinkSizeFunction = mockLinkSizeFunc(1) + defer func() { linksize.LinkSizeFunction = productionLinkSize }() ds := mdtest.Mock() dir := NewDirectory(ds) + checkBasicDirectory(t, dir, "new dir is not BasicDirectory") + ctx := context.Background() child := ft.EmptyDirNode() err := ds.Add(ctx, child) - if err != nil { - t.Fatal(err) - } + assert.NoError(t, err) - HAMTShardingSize = 0 // Create a BasicDirectory. - if _, ok := dir.(*UpgradeableDirectory).Directory.(*BasicDirectory); !ok { - t.Fatal("UpgradeableDirectory doesn't contain BasicDirectory") - } + err = dir.AddChild(ctx, "1", child) + assert.NoError(t, err) + checkBasicDirectory(t, dir, "added child, option still disabled") // Set a threshold so big a new entry won't trigger the change. HAMTShardingSize = math.MaxInt32 - err = dir.AddChild(ctx, "test", child) - if err != nil { - t.Fatal(err) - } - - if _, ok := dir.(*UpgradeableDirectory).Directory.(*HAMTDirectory); ok { - t.Fatal("UpgradeableDirectory was upgraded to HAMTDirectory for a large threshold") - } + err = dir.AddChild(ctx, "2", child) + assert.NoError(t, err) + checkBasicDirectory(t, dir, "added child, option now enabled but at max") // Now set it so low to make sure any new entry will trigger the upgrade. HAMTShardingSize = 1 - err = dir.AddChild(ctx, "test", child) // overwriting an entry should also trigger the switch - if err != nil { - t.Fatal(err) - } + // We are already above the threshold, we trigger the switch with an overwrite + // (any AddChild() should reevaluate the size). + err = dir.AddChild(ctx, "2", child) + assert.NoError(t, err) + checkHAMTDirectory(t, dir, "added child, option at min, should switch up") - if _, ok := dir.(*UpgradeableDirectory).Directory.(*HAMTDirectory); !ok { - t.Fatal("UpgradeableDirectory wasn't upgraded to HAMTDirectory for a low threshold") - } - upgradedDir := copyDir(t, dir) + // Set threshold at the number of current entries and delete the last one + // to trigger a switch and evaluate if the rest of the entries are conserved. + HAMTShardingSize = 2 + err = dir.RemoveChild(ctx, "2") + assert.NoError(t, err) + checkBasicDirectory(t, dir, "removed threshold entry, option at min, should switch down") +} - // Remove the single entry triggering the switch back to BasicDirectory - err = dir.RemoveChild(ctx, "test") - if err != nil { - t.Fatal(err) +func TestIntegrityOfDirectorySwitch(t *testing.T) { + ds := mdtest.Mock() + dir := NewDirectory(ds) + checkBasicDirectory(t, dir, "new dir is not BasicDirectory") + + ctx := context.Background() + child := ft.EmptyDirNode() + err := ds.Add(ctx, child) + assert.NoError(t, err) + + basicDir := newEmptyBasicDirectory(ds) + hamtDir, err := newEmptyHAMTDirectory(ds, DefaultShardWidth) + assert.NoError(t, err) + for i := 0; i < 1000; i++ { + basicDir.AddChild(ctx, strconv.FormatUint(uint64(i), 10), child) + hamtDir.AddChild(ctx, strconv.FormatUint(uint64(i), 10), child) } - if _, ok := dir.(*UpgradeableDirectory).Directory.(*BasicDirectory); !ok { - t.Fatal("UpgradeableDirectory wasn't downgraded to BasicDirectory after removal of the single entry") + compareDirectoryEntries(t, basicDir, hamtDir) + + hamtDirFromSwitch, err := basicDir.SwitchToSharding(ctx) + assert.NoError(t, err) + basicDirFromSwitch, err := hamtDir.switchToBasic(ctx) + assert.NoError(t, err) + compareDirectoryEntries(t, basicDir, basicDirFromSwitch) + compareDirectoryEntries(t, hamtDir, hamtDirFromSwitch) +} + +// This is the value of concurrent fetches during dag.Walk. Used in +// test to better predict how many nodes will be fetched. +var defaultConcurrentFetch = 32 + +// FIXME: Taken from private github.com/ipfs/go-merkledag@v0.2.3/merkledag.go. +// (We can also pass an explicit concurrency value in `(*Shard).EnumLinksAsync()` +// and take ownership of this configuration, but departing from the more +// standard and reliable one in `go-merkledag`. + +// Test that we fetch as little nodes as needed to reach the HAMTShardingSize +// during the sizeBelowThreshold computation. +func TestHAMTEnumerationWhenComputingSize(t *testing.T) { + // Adjust HAMT global/static options for the test to simplify its logic. + // FIXME: These variables weren't designed to be modified and we should + // review in depth side effects. + + // Set all link sizes to a uniform 1 so the estimated directory size + // is just the count of its entry links (in HAMT/Shard terminology these + // are the "value" links pointing to anything that is *not* another Shard). + linksize.LinkSizeFunction = mockLinkSizeFunc(1) + defer func() { linksize.LinkSizeFunction = productionLinkSize }() + + // Use an identity hash function to ease the construction of "complete" HAMTs + // (see CreateCompleteHAMT below for more details). (Ideally this should be + // a parameter we pass and not a global option we modify in the caller.) + oldHashFunc := internal.HAMTHashFunction + defer func() { internal.HAMTHashFunction = oldHashFunc }() + internal.HAMTHashFunction = idHash + + oldHamtOption := HAMTShardingSize + defer func() { HAMTShardingSize = oldHamtOption }() + + // --- End of test static configuration adjustments. --- + + // Some arbitrary values below that make this test not that expensive. + treeHeight := 4 + // How many leaf shards nodes (with value links, + // i.e., directory entries) do we need to reach the threshold. + thresholdToWidthRatio := 4 + // Departing from DefaultShardWidth of 256 to reduce HAMT size in + // CreateCompleteHAMT. + shardWidth := 16 + HAMTShardingSize = shardWidth * thresholdToWidthRatio + + // We create a "complete" HAMT (see CreateCompleteHAMT for more details) + // with a regular structure to be able to predict how many Shard nodes we + // will need to fetch in order to reach the HAMTShardingSize threshold in + // sizeBelowThreshold (assuming a sequential DAG walk function). + ds := mdtest.Mock() + completeHAMTRoot, err := CreateCompleteHAMT(ds, treeHeight, shardWidth) + assert.NoError(t, err) + + // With this structure and a BFS traversal (from `parallelWalkDepth`) then + // we would roughly fetch the following nodes: + nodesToFetch := 0 + // * all layers up to (but not including) the last one with leaf nodes + // (because it's a BFS) + for i := 0; i < treeHeight-1; i++ { + nodesToFetch += int(math.Pow(float64(shardWidth), float64(i))) } + // * `thresholdToWidthRatio` leaf Shards with enough value links to reach + // the HAMTShardingSize threshold. + nodesToFetch += thresholdToWidthRatio - // Check integrity between switches. - // We need to account for the removed entry that triggered the switch - // back. - // FIXME: Abstract this for arbitrary entries. - missingLink, err := ipld.MakeLink(child) + countGetsDS := newCountGetsDS(ds) + hamtDir, err := newHAMTDirectoryFromNode(countGetsDS, completeHAMTRoot) assert.NoError(t, err) - missingLink.Name = "test" - compareDirectoryEntries(t, upgradedDir, dir, []*ipld.Link{missingLink}) + + countGetsDS.resetCounter() + countGetsDS.setRequestDelay(10 * time.Millisecond) + // (Without the `setRequestDelay` above the number of nodes fetched + // drops dramatically and unpredictably as the BFS starts to behave + // more like a DFS because some search paths are fetched faster than + // others.) + below, err := hamtDir.sizeBelowThreshold(context.TODO(), 0) + assert.NoError(t, err) + assert.False(t, below) + t.Logf("fetched %d nodes (predicted range: %d-%d)", + countGetsDS.uniqueCidsFetched(), nodesToFetch, nodesToFetch+defaultConcurrentFetch) + // Check that the actual number of nodes fetched is within the margin of the + // estimated `nodesToFetch` plus an extra of `defaultConcurrentFetch` since + // we are fetching in parallel. + assert.True(t, countGetsDS.uniqueCidsFetched() <= nodesToFetch+defaultConcurrentFetch) + assert.True(t, countGetsDS.uniqueCidsFetched() >= nodesToFetch) } // Compare entries in the leftDir against the rightDir and possibly // missingEntries in the second. -func compareDirectoryEntries(t *testing.T, leftDir Directory, rightDir Directory, missingEntries []*ipld.Link) { +func compareDirectoryEntries(t *testing.T, leftDir Directory, rightDir Directory) { leftLinks, err := getAllLinksSortedByName(leftDir) assert.NoError(t, err) rightLinks, err := getAllLinksSortedByName(rightDir) assert.NoError(t, err) - rightLinks = append(rightLinks, missingEntries...) - sortLinksByName(rightLinks) assert.Equal(t, len(leftLinks), len(rightLinks)) @@ -283,28 +426,6 @@ func sortLinksByName(l []*ipld.Link) { }) } -func copyDir(t *testing.T, d Directory) Directory { - dirNode, err := d.GetNode() - assert.NoError(t, err) - // Extract the DAG service from the directory (i.e., its link entries saved - // in it). This is not exposed in the interface and we won't change that now. - // FIXME: Still, this isn't nice. - var ds ipld.DAGService - switch v := d.(type) { - case *BasicDirectory: - ds = v.dserv - case *HAMTDirectory: - ds = v.dserv - case *UpgradeableDirectory: - ds = v.getDagService() - default: - panic("unknown directory type") - } - copiedDir, err := NewDirectoryFromNode(ds, dirNode) - assert.NoError(t, err) - return copiedDir -} - func TestDirBuilder(t *testing.T) { ds := mdtest.Mock() dir := NewDirectory(ds) @@ -389,3 +510,89 @@ func TestDirBuilder(t *testing.T) { t.Fatal("wrong number of links", len(asyncLinks), count) } } + +func newHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) { + shard, err := hamt.NewHamtFromDag(dserv, node) + if err != nil { + return nil, err + } + return &HAMTDirectory{ + dserv: dserv, + shard: shard, + }, nil +} + +func newEmptyHAMTDirectory(dserv ipld.DAGService, shardWidth int) (*HAMTDirectory, error) { + shard, err := hamt.NewShard(dserv, shardWidth) + if err != nil { + return nil, err + } + + return &HAMTDirectory{ + dserv: dserv, + shard: shard, + }, nil +} + +// countGetsDS is a DAG service that keeps track of the number of +// unique CIDs fetched. +type countGetsDS struct { + ipld.DAGService + + cidsFetched map[cid.Cid]struct{} + mapLock sync.Mutex + + getRequestDelay time.Duration +} + +var _ ipld.DAGService = (*countGetsDS)(nil) + +func newCountGetsDS(ds ipld.DAGService) *countGetsDS { + return &countGetsDS{ + ds, + make(map[cid.Cid]struct{}), + sync.Mutex{}, + 0, + } +} + +func (d *countGetsDS) resetCounter() { + d.mapLock.Lock() + defer d.mapLock.Unlock() + d.cidsFetched = make(map[cid.Cid]struct{}) +} + +func (d *countGetsDS) uniqueCidsFetched() int { + d.mapLock.Lock() + defer d.mapLock.Unlock() + return len(d.cidsFetched) +} + +func (d *countGetsDS) setRequestDelay(timeout time.Duration) { + d.getRequestDelay = timeout +} + +func (d *countGetsDS) Get(ctx context.Context, c cid.Cid) (ipld.Node, error) { + node, err := d.DAGService.Get(ctx, c) + if err != nil { + return nil, err + } + + d.mapLock.Lock() + _, cidRequestedBefore := d.cidsFetched[c] + d.cidsFetched[c] = struct{}{} + d.mapLock.Unlock() + + if d.getRequestDelay != 0 && !cidRequestedBefore { + // First request gets a timeout to simulate a network fetch. + // Subsequent requests get no timeout simulating an in-disk cache. + time.Sleep(d.getRequestDelay) + } + + return node, nil +} + +// Process sequentially (blocking) calling Get which tracks requests. +func (d *countGetsDS) GetMany(ctx context.Context, cids []cid.Cid) <-chan *ipld.NodeOption { + panic("GetMany not supported") +} diff --git a/private/linksize/linksize.go b/private/linksize/linksize.go new file mode 100644 index 000000000..e7ae098b6 --- /dev/null +++ b/private/linksize/linksize.go @@ -0,0 +1,5 @@ +package linksize + +import "github.com/ipfs/go-cid" + +var LinkSizeFunction func(linkName string, linkCid cid.Cid) int From 8af3612d3b3784c846a821719e6ad79e3ab26d88 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Fri, 22 Oct 2021 18:26:46 -0300 Subject: [PATCH 28/38] go mod tidy --- go.sum | 1 + 1 file changed, 1 insertion(+) diff --git a/go.sum b/go.sum index 7ce062ee0..f6494e02c 100644 --- a/go.sum +++ b/go.sum @@ -367,6 +367,7 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1N golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= From c8c496aa05878fa9e026165e465da5148508e3b0 Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Fri, 22 Oct 2021 17:42:26 -0400 Subject: [PATCH 29/38] use explicit returns in setValue --- hamt/hamt.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index 7e770c9d6..74a8d2759 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -460,10 +460,10 @@ func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error { // setValue sets the link `value` in the given key, either creating the entry // if it didn't exist or overwriting the old one. It returns the old entry (if any). -func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *ipld.Link) (oldValue *ipld.Link, err error) { +func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *ipld.Link) (*ipld.Link, error) { idx, err := hv.Next(ds.tableSizeLg2) if err != nil { - return + return nil, err } if !ds.childer.has(idx) { @@ -474,7 +474,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * i := ds.childer.sliceIndex(idx) child, err := ds.childer.get(ctx, i) if err != nil { - return + return nil, err } if child.isValueNode() { @@ -482,14 +482,14 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * if child.key == key { // We are in the correct shard (tree level) so we modify this child // and return. - oldValue = child.val + oldValue := child.val if value == nil { // Remove old entry. return oldValue, ds.childer.rm(idx) } child.val = value // Overwrite entry. - return + return oldValue, nil } if value == nil { @@ -519,11 +519,11 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * // will create new ones until we find different slots for both.) _, err = child.setValue(ctx, hv, key, value) if err != nil { - return + return nil, err } _, err = child.setValue(ctx, chhv, grandChild.key, grandChild.val) if err != nil { - return + return nil, err } // Replace this leaf node with the new Shard node. @@ -532,9 +532,9 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * } else { // We are in a Shard (internal node). We will recursively call this // function until finding the leaf (the logic of the `if` case above). - oldValue, err = child.setValue(ctx, hv, key, value) + oldValue, err := child.setValue(ctx, hv, key, value) if err != nil { - return + return nil, err } if value == nil { @@ -558,7 +558,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * if schild.isValueNode() { ds.childer.set(schild, i) } - return + return oldValue, nil } // Otherwise, work with the link. @@ -566,17 +566,17 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * var lnkType linkType lnkType, err = child.childer.sd.childLinkType(slnk) if err != nil { - return + return nil, err } if lnkType == shardValueLink { // sub-shard with a single value element, collapse it ds.childer.setLink(slnk, i) } - return + return oldValue, nil } } - return + return oldValue, nil } } From 83ad983737ea445fd55b50f3467e7b8672ac59b2 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Mon, 25 Oct 2021 17:35:11 -0300 Subject: [PATCH 30/38] add default value for HAMTShardingSize of 256 KiB --- go.mod | 1 + go.sum | 2 ++ io/directory.go | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index c8f6da680..aac677992 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,7 @@ module github.com/ipfs/go-unixfs require ( + github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a github.com/gogo/protobuf v1.3.2 github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c // indirect github.com/ipfs/go-bitfield v1.0.0 diff --git a/go.sum b/go.sum index f6494e02c..5d067042f 100644 --- a/go.sum +++ b/go.sum @@ -3,6 +3,8 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETFUpAzWW2ep1Y= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= +github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a h1:E/8AP5dFtMhl5KPJz66Kt9G0n+7Sn41Fy1wv9/jHOrc= +github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/btcsuite/btcd v0.0.0-20190213025234-306aecffea32/go.mod h1:DrZx5ec/dmnfpw9KyYoQyYo7d0KEvTkk/5M/vbZjAr8= github.com/btcsuite/btcd v0.0.0-20190523000118-16327141da8c/go.mod h1:3J08xEfcugPacsc34/LKRU2yO7YmuT8yt28J8k2+rrI= github.com/btcsuite/btcd v0.0.0-20190605094302-a0d1e3e36d50 h1:4i3KsuVA0o0KoBxAC5x+MY7RbteiMK1V7gf/G08NGIQ= diff --git a/io/directory.go b/io/directory.go index b8915a3b0..50aaee104 100644 --- a/io/directory.go +++ b/io/directory.go @@ -8,6 +8,7 @@ import ( "github.com/ipfs/go-unixfs/hamt" "github.com/ipfs/go-unixfs/private/linksize" + "github.com/alecthomas/units" "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" logging "github.com/ipfs/go-log" @@ -23,7 +24,7 @@ var log = logging.Logger("unixfs") // The size is not the *exact* block size of the encoded BasicDirectory but just // the estimated size based byte length of links name and CID (BasicDirectory's // ProtoNode doesn't use the Data field so this estimate is pretty accurate). -var HAMTShardingSize = 0 +var HAMTShardingSize = int(256 * units.KiB) // DefaultShardWidth is the default value used for hamt sharding width. // Needs to be a power of two (shard entry size) and multiple of 8 (bitfield size). From 20d951f6ae5ec92b9527d3b6fa0f192d6dc02a8e Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Thu, 11 Nov 2021 19:15:31 -0500 Subject: [PATCH 31/38] feat: hamt enumlinks custom (#111) * feat: use custom dag traversal for HAMT link enumeration * fix comments in completehamt_test.go Co-authored-by: Lucas Molas --- go.mod | 6 ++ go.sum | 1 + hamt/hamt.go | 200 +++++++++++++++++++++++++++++++++------- io/completehamt_test.go | 3 +- io/directory_test.go | 80 ++++++++++++---- 5 files changed, 236 insertions(+), 54 deletions(-) diff --git a/go.mod b/go.mod index aac677992..d981b36f7 100644 --- a/go.mod +++ b/go.mod @@ -6,8 +6,13 @@ require ( github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c // indirect github.com/ipfs/go-bitfield v1.0.0 github.com/ipfs/go-bitswap v0.1.2 // indirect + github.com/ipfs/go-block-format v0.0.2 + github.com/ipfs/go-blockservice v0.1.0 github.com/ipfs/go-cid v0.0.7 + github.com/ipfs/go-datastore v0.0.5 + github.com/ipfs/go-ipfs-blockstore v0.0.1 github.com/ipfs/go-ipfs-chunker v0.0.1 + github.com/ipfs/go-ipfs-exchange-offline v0.0.1 github.com/ipfs/go-ipfs-files v0.0.3 github.com/ipfs/go-ipfs-posinfo v0.0.1 github.com/ipfs/go-ipfs-util v0.0.1 @@ -21,6 +26,7 @@ require ( github.com/spaolacci/murmur3 v1.1.0 github.com/stretchr/testify v1.7.0 github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830 // indirect + golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 ) go 1.16 diff --git a/go.sum b/go.sum index 5d067042f..ca32d1143 100644 --- a/go.sum +++ b/go.sum @@ -335,6 +335,7 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= diff --git a/hamt/hamt.go b/hamt/hamt.go index 74a8d2759..c3e7ce1a7 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -24,6 +24,9 @@ import ( "context" "fmt" "os" + "sync" + + "golang.org/x/sync/errgroup" format "github.com/ipfs/go-unixfs" "github.com/ipfs/go-unixfs/internal" @@ -372,14 +375,11 @@ func (ds *Shard) EnumLinksAsync(ctx context.Context) <-chan format.LinkResult { go func() { defer close(linkResults) defer cancel() - getLinks := makeAsyncTrieGetLinks(ds.dserv, linkResults) - cset := cid.NewSet() - rootNode, err := ds.Node() - if err != nil { - emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err}) - return - } - err = dag.Walk(ctx, getLinks, rootNode.Cid(), cset.Visit, dag.Concurrent()) + + err := parallelShardWalk(ctx, ds, ds.dserv, func(formattedLink *ipld.Link) error { + emitResult(ctx, linkResults, format.LinkResult{Link: formattedLink, Err: nil}) + return nil + }) if err != nil { emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err}) } @@ -387,44 +387,178 @@ func (ds *Shard) EnumLinksAsync(ctx context.Context) <-chan format.LinkResult { return linkResults } -// makeAsyncTrieGetLinks builds a getLinks function that can be used with EnumerateChildrenAsync -// to iterate a HAMT shard. It takes an IPLD Dag Service to fetch nodes, and a call back that will get called -// on all links to leaf nodes in a HAMT tree, so they can be collected for an EnumLinks operation -func makeAsyncTrieGetLinks(dagService ipld.DAGService, linkResults chan<- format.LinkResult) dag.GetLinks { - - return func(ctx context.Context, currentCid cid.Cid) ([]*ipld.Link, error) { - node, err := dagService.Get(ctx, currentCid) - if err != nil { - return nil, err - } - directoryShard, err := NewHamtFromDag(dagService, node) - if err != nil { - return nil, err - } +type listCidsAndShards struct { + cids []cid.Cid + shards []*Shard +} - childShards := make([]*ipld.Link, 0, directoryShard.childer.length()) - links := directoryShard.childer.links - for idx := range directoryShard.childer.children { - lnk := links[idx] - lnkLinkType, err := directoryShard.childLinkType(lnk) +func (ds *Shard) walkChildren(processLinkValues func(formattedLink *ipld.Link) error) (*listCidsAndShards, error) { + res := &listCidsAndShards{} + for idx, lnk := range ds.childer.links { + if nextShard := ds.childer.children[idx]; nextShard == nil { + lnkLinkType, err := ds.childLinkType(lnk) if err != nil { return nil, err } - if lnkLinkType == shardLink { - childShards = append(childShards, lnk) - } else { - sv, err := directoryShard.makeShardValue(lnk) + + switch lnkLinkType { + case shardValueLink: + sv, err := ds.makeShardValue(lnk) if err != nil { return nil, err } formattedLink := sv.val formattedLink.Name = sv.key - emitResult(ctx, linkResults, format.LinkResult{Link: formattedLink, Err: nil}) + + if err := processLinkValues(formattedLink); err != nil { + return nil, err + } + case shardLink: + res.cids = append(res.cids, lnk.Cid) + default: + return nil, fmt.Errorf("unsupported shard link type") + } + + } else { + if nextShard.val != nil { + formattedLink := &ipld.Link{ + Name: nextShard.key, + Size: nextShard.val.Size, + Cid: nextShard.val.Cid, + } + if err := processLinkValues(formattedLink); err != nil { + return nil, err + } + } else { + res.shards = append(res.shards, nextShard) + } + } + } + return res, nil +} + +// parallelShardWalk is quite similar to the DAG walking algorithm from https://github.com/ipfs/go-merkledag/blob/594e515f162e764183243b72c2ba84f743424c8c/merkledag.go#L464 +// However, there are a few notable differences: +// 1. Some children are actualized Shard structs and some are in the blockstore, this will leverage walking over the in memory Shards as well as the stored blocks +// 2. Instead of just passing each child into the worker pool by itself we group them so that we can leverage optimizations from GetMany. +// This optimization also makes the walk a little more biased towards depth (as opposed to BFS) in the earlier part of the DAG. +// This is particularly helpful for operations like estimating the directory size which should complete quickly when possible. +// 3. None of the extra options from that package are needed +func parallelShardWalk(ctx context.Context, root *Shard, dserv ipld.DAGService, processShardValues func(formattedLink *ipld.Link) error) error { + const concurrency = 32 + + var visitlk sync.Mutex + visitSet := cid.NewSet() + visit := visitSet.Visit + + // Setup synchronization + grp, errGrpCtx := errgroup.WithContext(ctx) + + // Input and output queues for workers. + feed := make(chan *listCidsAndShards) + out := make(chan *listCidsAndShards) + done := make(chan struct{}) + + for i := 0; i < concurrency; i++ { + grp.Go(func() error { + for feedChildren := range feed { + for _, nextShard := range feedChildren.shards { + nextChildren, err := nextShard.walkChildren(processShardValues) + if err != nil { + return err + } + + select { + case out <- nextChildren: + case <-errGrpCtx.Done(): + return nil + } + } + + var linksToVisit []cid.Cid + for _, nextCid := range feedChildren.cids { + var shouldVisit bool + + visitlk.Lock() + shouldVisit = visit(nextCid) + visitlk.Unlock() + + if shouldVisit { + linksToVisit = append(linksToVisit, nextCid) + } + } + + chNodes := dserv.GetMany(errGrpCtx, linksToVisit) + for optNode := range chNodes { + if optNode.Err != nil { + return optNode.Err + } + + nextShard, err := NewHamtFromDag(dserv, optNode.Node) + if err != nil { + return err + } + + nextChildren, err := nextShard.walkChildren(processShardValues) + if err != nil { + return err + } + + select { + case out <- nextChildren: + case <-errGrpCtx.Done(): + return nil + } + } + + select { + case done <- struct{}{}: + case <-errGrpCtx.Done(): + } + } + return nil + }) + } + + send := feed + var todoQueue []*listCidsAndShards + var inProgress int + + next := &listCidsAndShards{ + shards: []*Shard{root}, + } + +dispatcherLoop: + for { + select { + case send <- next: + inProgress++ + if len(todoQueue) > 0 { + next = todoQueue[0] + todoQueue = todoQueue[1:] + } else { + next = nil + send = nil + } + case <-done: + inProgress-- + if inProgress == 0 && next == nil { + break dispatcherLoop + } + case nextNodes := <-out: + if next == nil { + next = nextNodes + send = feed + } else { + todoQueue = append(todoQueue, nextNodes) } + case <-errGrpCtx.Done(): + break dispatcherLoop } - return childShards, nil } + close(feed) + return grp.Wait() } func emitResult(ctx context.Context, linkResults chan<- format.LinkResult, r format.LinkResult) { diff --git a/io/completehamt_test.go b/io/completehamt_test.go index 1bb3d8720..7995ac1d7 100644 --- a/io/completehamt_test.go +++ b/io/completehamt_test.go @@ -21,7 +21,8 @@ import ( // * all leaf Shard nodes have the same depth (and have only 'value' links). // * all internal Shard nodes point only to other Shards (and hence have zero 'value' links). // * the total number of 'value' links (directory entries) is: -// io.DefaultShardWidth ^ (treeHeight + 1). +// childsPerNode ^ (treeHeight). +// treeHeight: The number of layers of non-value HAMT nodes (e.g. height = 1 is a single shard pointing to some values) // FIXME: HAMTHashFunction needs to be set to idHash by the caller. We depend on // this simplification for the current logic to work. (HAMTHashFunction is a // global setting of the package, it is hard-coded in the serialized Shard node diff --git a/io/directory_test.go b/io/directory_test.go index 909f9b4fd..fd58438ed 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -11,7 +11,13 @@ import ( "testing" "time" + blocks "github.com/ipfs/go-block-format" + bsrv "github.com/ipfs/go-blockservice" cid "github.com/ipfs/go-cid" + ds "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + blockstore "github.com/ipfs/go-ipfs-blockstore" + offline "github.com/ipfs/go-ipfs-exchange-offline" ipld "github.com/ipfs/go-ipld-format" mdag "github.com/ipfs/go-merkledag" mdtest "github.com/ipfs/go-merkledag/test" @@ -358,10 +364,23 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) { // with a regular structure to be able to predict how many Shard nodes we // will need to fetch in order to reach the HAMTShardingSize threshold in // sizeBelowThreshold (assuming a sequential DAG walk function). - ds := mdtest.Mock() - completeHAMTRoot, err := CreateCompleteHAMT(ds, treeHeight, shardWidth) + + bstore := blockstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore())) + countGetsDS := newCountGetsDS(bstore) + dsrv := mdag.NewDAGService(bsrv.New(countGetsDS, offline.Exchange(countGetsDS))) + completeHAMTRoot, err := CreateCompleteHAMT(dsrv, treeHeight, shardWidth) assert.NoError(t, err) + // Calculate the optimal number of nodes to traverse + optimalNodesToFetch := 0 + nodesToProcess := HAMTShardingSize + for i := 0; i < treeHeight-1; i++ { + // divide by the shard width to get the parents and continue up the tree + parentNodes := int(math.Ceil(float64(nodesToProcess) / float64(shardWidth))) + optimalNodesToFetch += parentNodes + nodesToProcess = parentNodes + } + // With this structure and a BFS traversal (from `parallelWalkDepth`) then // we would roughly fetch the following nodes: nodesToFetch := 0 @@ -374,8 +393,7 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) { // the HAMTShardingSize threshold. nodesToFetch += thresholdToWidthRatio - countGetsDS := newCountGetsDS(ds) - hamtDir, err := newHAMTDirectoryFromNode(countGetsDS, completeHAMTRoot) + hamtDir, err := newHAMTDirectoryFromNode(dsrv, completeHAMTRoot) assert.NoError(t, err) countGetsDS.resetCounter() @@ -388,12 +406,12 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) { assert.NoError(t, err) assert.False(t, below) t.Logf("fetched %d nodes (predicted range: %d-%d)", - countGetsDS.uniqueCidsFetched(), nodesToFetch, nodesToFetch+defaultConcurrentFetch) + countGetsDS.uniqueCidsFetched(), optimalNodesToFetch, nodesToFetch+defaultConcurrentFetch) // Check that the actual number of nodes fetched is within the margin of the // estimated `nodesToFetch` plus an extra of `defaultConcurrentFetch` since // we are fetching in parallel. assert.True(t, countGetsDS.uniqueCidsFetched() <= nodesToFetch+defaultConcurrentFetch) - assert.True(t, countGetsDS.uniqueCidsFetched() >= nodesToFetch) + assert.True(t, countGetsDS.uniqueCidsFetched() >= optimalNodesToFetch) } // Compare entries in the leftDir against the rightDir and possibly @@ -537,21 +555,23 @@ func newEmptyHAMTDirectory(dserv ipld.DAGService, shardWidth int) (*HAMTDirector // countGetsDS is a DAG service that keeps track of the number of // unique CIDs fetched. type countGetsDS struct { - ipld.DAGService + blockstore.Blockstore cidsFetched map[cid.Cid]struct{} mapLock sync.Mutex + started bool getRequestDelay time.Duration } -var _ ipld.DAGService = (*countGetsDS)(nil) +var _ blockstore.Blockstore = (*countGetsDS)(nil) -func newCountGetsDS(ds ipld.DAGService) *countGetsDS { +func newCountGetsDS(bs blockstore.Blockstore) *countGetsDS { return &countGetsDS{ - ds, + bs, make(map[cid.Cid]struct{}), sync.Mutex{}, + false, 0, } } @@ -560,6 +580,7 @@ func (d *countGetsDS) resetCounter() { d.mapLock.Lock() defer d.mapLock.Unlock() d.cidsFetched = make(map[cid.Cid]struct{}) + d.started = true } func (d *countGetsDS) uniqueCidsFetched() int { @@ -572,12 +593,7 @@ func (d *countGetsDS) setRequestDelay(timeout time.Duration) { d.getRequestDelay = timeout } -func (d *countGetsDS) Get(ctx context.Context, c cid.Cid) (ipld.Node, error) { - node, err := d.DAGService.Get(ctx, c) - if err != nil { - return nil, err - } - +func (d *countGetsDS) maybeSleep(c cid.Cid) { d.mapLock.Lock() _, cidRequestedBefore := d.cidsFetched[c] d.cidsFetched[c] = struct{}{} @@ -588,11 +604,35 @@ func (d *countGetsDS) Get(ctx context.Context, c cid.Cid) (ipld.Node, error) { // Subsequent requests get no timeout simulating an in-disk cache. time.Sleep(d.getRequestDelay) } +} - return node, nil +func (d *countGetsDS) Has(c cid.Cid) (bool, error) { + if d.started { + panic("implement me") + } + return d.Blockstore.Has(c) } -// Process sequentially (blocking) calling Get which tracks requests. -func (d *countGetsDS) GetMany(ctx context.Context, cids []cid.Cid) <-chan *ipld.NodeOption { - panic("GetMany not supported") +func (d *countGetsDS) Get(c cid.Cid) (blocks.Block, error) { + blk, err := d.Blockstore.Get(c) + if err != nil { + return nil, err + } + + d.maybeSleep(c) + return blk, nil +} + +func (d *countGetsDS) GetSize(c cid.Cid) (int, error) { + if d.started { + panic("implement me") + } + return d.Blockstore.GetSize(c) +} + +func (d *countGetsDS) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + if d.started { + panic("implement me") + } + return d.Blockstore.AllKeysChan(ctx) } From 5d4e0960eef1ca4ddaf6d5770cb8cf01075a6575 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 11 Nov 2021 21:10:20 -0300 Subject: [PATCH 32/38] rename hamt functions --- hamt/hamt.go | 26 +++++++++++++------------- io/completehamt_test.go | 4 ++-- io/directory.go | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index c3e7ce1a7..ac1c5e458 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -225,14 +225,14 @@ func (ds *Shard) makeShardValue(lnk *ipld.Link) (*Shard, error) { // Set sets 'name' = nd in the HAMT func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error { - _, err := ds.SetAndPrevious(ctx, name, nd) + _, err := ds.Swap(ctx, name, nd) return err } -// SetAndPrevious sets a link pointing to the passed node as the value under the +// Swap sets a link pointing to the passed node as the value under the // name key in this Shard or its children. It also returns the previous link // under that name key (if any). -func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) { +func (ds *Shard) Swap(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) { hv := newHashBits(name) err := ds.dserv.Add(ctx, node) if err != nil { @@ -248,21 +248,21 @@ func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node // This is confusing, confirm and remove this line. lnk.Name = ds.linkNamePrefix(0) + name - return ds.setValue(ctx, hv, name, lnk) + return ds.swapValue(ctx, hv, name, lnk) } // Remove deletes the named entry if it exists. Otherwise, it returns // os.ErrNotExist. func (ds *Shard) Remove(ctx context.Context, name string) error { - _, err := ds.RemoveAndPrevious(ctx, name) + _, err := ds.Take(ctx, name) return err } -// RemoveAndPrevious is similar to the public Remove but also returns the +// Take is similar to the public Remove but also returns the // old removed link (if it exists). -func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) { +func (ds *Shard) Take(ctx context.Context, name string) (*ipld.Link, error) { hv := newHashBits(name) - return ds.setValue(ctx, hv, name, nil) + return ds.swapValue(ctx, hv, name, nil) } // Find searches for a child node by 'name' within this hamt @@ -592,9 +592,9 @@ func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error { }) } -// setValue sets the link `value` in the given key, either creating the entry +// swapValue sets the link `value` in the given key, either creating the entry // if it didn't exist or overwriting the old one. It returns the old entry (if any). -func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *ipld.Link) (*ipld.Link, error) { +func (ds *Shard) swapValue(ctx context.Context, hv *hashBits, key string, value *ipld.Link) (*ipld.Link, error) { idx, err := hv.Next(ds.tableSizeLg2) if err != nil { return nil, err @@ -651,11 +651,11 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * // (which will be nil) to highlight there is no overwrite here: they are // done with different keys to a new (empty) shard. (At best this shard // will create new ones until we find different slots for both.) - _, err = child.setValue(ctx, hv, key, value) + _, err = child.swapValue(ctx, hv, key, value) if err != nil { return nil, err } - _, err = child.setValue(ctx, chhv, grandChild.key, grandChild.val) + _, err = child.swapValue(ctx, chhv, grandChild.key, grandChild.val) if err != nil { return nil, err } @@ -666,7 +666,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value * } else { // We are in a Shard (internal node). We will recursively call this // function until finding the leaf (the logic of the `if` case above). - oldValue, err := child.setValue(ctx, hv, key, value) + oldValue, err := child.swapValue(ctx, hv, key, value) if err != nil { return nil, err } diff --git a/io/completehamt_test.go b/io/completehamt_test.go index 7995ac1d7..50be9649e 100644 --- a/io/completehamt_test.go +++ b/io/completehamt_test.go @@ -28,7 +28,7 @@ import ( // global setting of the package, it is hard-coded in the serialized Shard node // and not allowed to be changed on a per HAMT/Shard basis.) // (If we didn't rehash inside setValue then we could just generate -// the fake hash as in io.SetAndPrevious through `newHashBits()` and pass +// the fake hash as in io.Swap through `newHashBits()` and pass // it as an argument making the hash independent of tree manipulation; that // sounds as the correct way to go in general and we wouldn't need this.) func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int, childsPerNode int) (ipld.Node, error) { @@ -59,7 +59,7 @@ func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int, childsPerNode int) ( var hashbuf [8]byte binary.LittleEndian.PutUint64(hashbuf[:], uint64(i)) var oldLink *ipld.Link - oldLink, err = rootShard.SetAndPrevious(context.Background(), string(hashbuf[:bytesInKey]), unixfs.EmptyFileNode()) + oldLink, err = rootShard.Swap(context.Background(), string(hashbuf[:bytesInKey]), unixfs.EmptyFileNode()) if err != nil { return nil, err } diff --git a/io/directory.go b/io/directory.go index 50aaee104..c316d0f8a 100644 --- a/io/directory.go +++ b/io/directory.go @@ -355,7 +355,7 @@ func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) { // AddChild implements the `Directory` interface. func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - oldChild, err := d.shard.SetAndPrevious(ctx, name, nd) + oldChild, err := d.shard.Swap(ctx, name, nd) if err != nil { return err } @@ -395,7 +395,7 @@ func (d *HAMTDirectory) Find(ctx context.Context, name string) (ipld.Node, error // RemoveChild implements the `Directory` interface. func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error { - oldChild, err := d.shard.RemoveAndPrevious(ctx, name) + oldChild, err := d.shard.Take(ctx, name) if err != nil { return err } From 06b9d52eb6b8bf75f8e9bfe7fc50150f3497cbff Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 11 Nov 2021 21:11:54 -0300 Subject: [PATCH 33/38] unexport SwitchToSharding --- io/directory.go | 6 +++--- io/directory_test.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/io/directory.go b/io/directory.go index c316d0f8a..c34ede6ec 100644 --- a/io/directory.go +++ b/io/directory.go @@ -321,8 +321,8 @@ func (d *BasicDirectory) GetCidBuilder() cid.Builder { return d.node.CidBuilder() } -// SwitchToSharding returns a HAMT implementation of this directory. -func (d *BasicDirectory) SwitchToSharding(ctx context.Context) (*HAMTDirectory, error) { +// switchToSharding returns a HAMT implementation of this directory. +func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, error) { hamtDir := new(HAMTDirectory) hamtDir.dserv = d.dserv @@ -575,7 +575,7 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl if !switchToHAMT { return basicDir.AddChild(ctx, name, nd) } - hamtDir, err = basicDir.SwitchToSharding(ctx) + hamtDir, err = basicDir.switchToSharding(ctx) if err != nil { return err } diff --git a/io/directory_test.go b/io/directory_test.go index fd58438ed..0863f87cc 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -308,7 +308,7 @@ func TestIntegrityOfDirectorySwitch(t *testing.T) { } compareDirectoryEntries(t, basicDir, hamtDir) - hamtDirFromSwitch, err := basicDir.SwitchToSharding(ctx) + hamtDirFromSwitch, err := basicDir.switchToSharding(ctx) assert.NoError(t, err) basicDirFromSwitch, err := hamtDir.switchToBasic(ctx) assert.NoError(t, err) From adb8aef4fe502c389383a939751d93c50d77bd24 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 11 Nov 2021 21:22:05 -0300 Subject: [PATCH 34/38] rename UpgradeableDirectory to DyanmicDirectory --- io/directory.go | 23 +++++++++-------------- io/directory_test.go | 8 ++++---- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/io/directory.go b/io/directory.go index c34ede6ec..996509aa0 100644 --- a/io/directory.go +++ b/io/directory.go @@ -126,10 +126,10 @@ func newBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *Bas return basicDir } -// NewDirectory returns a Directory implemented by UpgradeableDirectory +// NewDirectory returns a Directory implemented by DyanmicDirectory // containing a BasicDirectory that can be converted to a HAMTDirectory. func NewDirectory(dserv ipld.DAGService) Directory { - return &UpgradeableDirectory{newEmptyBasicDirectory(dserv)} + return &DyanmicDirectory{newEmptyBasicDirectory(dserv)} } // ErrNotADir implies that the given node was not a unixfs directory @@ -150,13 +150,13 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err switch fsNode.Type() { case format.TDirectory: - return &UpgradeableDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil + return &DyanmicDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil case format.THAMTShard: shard, err := hamt.NewHamtFromDag(dserv, node) if err != nil { return nil, err } - return &UpgradeableDirectory{&HAMTDirectory{shard, dserv, 0}}, nil + return &DyanmicDirectory{&HAMTDirectory{shard, dserv, 0}}, nil } return nil, ErrNotADir @@ -524,22 +524,17 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) return true, nil } -// UpgradeableDirectory wraps a Directory interface and provides extra logic +// DyanmicDirectory wraps a Directory interface and provides extra logic // to upgrade from its BasicDirectory implementation to HAMTDirectory. -// FIXME: Rename to something that reflects the new bi-directionality. We no -// longer go in the "forward" direction (upgrade) but we also go "backward". -// Possible alternatives: SwitchableDirectory or DynamicDirectory. Also consider -// more generic-sounding names like WrapperDirectory that emphasize that this -// is just the middleman and has no real Directory-implementing logic. -type UpgradeableDirectory struct { +type DyanmicDirectory struct { Directory } -var _ Directory = (*UpgradeableDirectory)(nil) +var _ Directory = (*DyanmicDirectory)(nil) // AddChild implements the `Directory` interface. We check when adding new entries // if we should switch to HAMTDirectory according to global option(s). -func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { +func (d *DyanmicDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { hamtDir, ok := d.Directory.(*HAMTDirectory) if ok { // We evaluate a switch in the HAMTDirectory case even for an AddChild @@ -595,7 +590,7 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl // (in which case we would have the hard comparison in GetNode() to make // sure we make good on the value). Finding the right margin can be tricky // and very dependent on the use case so it might not be worth it. -func (d *UpgradeableDirectory) RemoveChild(ctx context.Context, name string) error { +func (d *DyanmicDirectory) RemoveChild(ctx context.Context, name string) error { hamtDir, ok := d.Directory.(*HAMTDirectory) if !ok { return d.Directory.RemoveChild(ctx, name) diff --git a/io/directory_test.go b/io/directory_test.go index 0863f87cc..bca79fc1d 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -207,13 +207,13 @@ func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int } func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*UpgradeableDirectory).Directory.(*BasicDirectory); !ok { + if _, ok := dir.(*DyanmicDirectory).Directory.(*BasicDirectory); !ok { t.Fatal(errorMessage) } } func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*UpgradeableDirectory).Directory.(*HAMTDirectory); !ok { + if _, ok := dir.(*DyanmicDirectory).Directory.(*HAMTDirectory); !ok { t.Fatal(errorMessage) } } @@ -244,8 +244,8 @@ func TestProductionLinkSize(t *testing.T) { } // Test HAMTDirectory <-> BasicDirectory switch based on directory size. The -// switch is managed by the UpgradeableDirectory abstraction. -func TestUpgradeableDirectorySwitch(t *testing.T) { +// switch is managed by the DyanmicDirectory abstraction. +func TestDynamicDirectorySwitch(t *testing.T) { oldHamtOption := HAMTShardingSize defer func() { HAMTShardingSize = oldHamtOption }() HAMTShardingSize = 0 // Disable automatic switch at the start. From 3e16e8eec75e803ac6cd7254455cb8857514adf3 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 11 Nov 2021 21:26:15 -0300 Subject: [PATCH 35/38] fix Dyanmic to Dynamic --- io/directory.go | 18 +++++++++--------- io/directory_test.go | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/io/directory.go b/io/directory.go index 996509aa0..9335054ad 100644 --- a/io/directory.go +++ b/io/directory.go @@ -126,10 +126,10 @@ func newBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *Bas return basicDir } -// NewDirectory returns a Directory implemented by DyanmicDirectory +// NewDirectory returns a Directory implemented by DynamicDirectory // containing a BasicDirectory that can be converted to a HAMTDirectory. func NewDirectory(dserv ipld.DAGService) Directory { - return &DyanmicDirectory{newEmptyBasicDirectory(dserv)} + return &DynamicDirectory{newEmptyBasicDirectory(dserv)} } // ErrNotADir implies that the given node was not a unixfs directory @@ -150,13 +150,13 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err switch fsNode.Type() { case format.TDirectory: - return &DyanmicDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil + return &DynamicDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil case format.THAMTShard: shard, err := hamt.NewHamtFromDag(dserv, node) if err != nil { return nil, err } - return &DyanmicDirectory{&HAMTDirectory{shard, dserv, 0}}, nil + return &DynamicDirectory{&HAMTDirectory{shard, dserv, 0}}, nil } return nil, ErrNotADir @@ -524,17 +524,17 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) return true, nil } -// DyanmicDirectory wraps a Directory interface and provides extra logic +// DynamicDirectory wraps a Directory interface and provides extra logic // to upgrade from its BasicDirectory implementation to HAMTDirectory. -type DyanmicDirectory struct { +type DynamicDirectory struct { Directory } -var _ Directory = (*DyanmicDirectory)(nil) +var _ Directory = (*DynamicDirectory)(nil) // AddChild implements the `Directory` interface. We check when adding new entries // if we should switch to HAMTDirectory according to global option(s). -func (d *DyanmicDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { +func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { hamtDir, ok := d.Directory.(*HAMTDirectory) if ok { // We evaluate a switch in the HAMTDirectory case even for an AddChild @@ -590,7 +590,7 @@ func (d *DyanmicDirectory) AddChild(ctx context.Context, name string, nd ipld.No // (in which case we would have the hard comparison in GetNode() to make // sure we make good on the value). Finding the right margin can be tricky // and very dependent on the use case so it might not be worth it. -func (d *DyanmicDirectory) RemoveChild(ctx context.Context, name string) error { +func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { hamtDir, ok := d.Directory.(*HAMTDirectory) if !ok { return d.Directory.RemoveChild(ctx, name) diff --git a/io/directory_test.go b/io/directory_test.go index bca79fc1d..f5fa2e564 100644 --- a/io/directory_test.go +++ b/io/directory_test.go @@ -207,13 +207,13 @@ func mockLinkSizeFunc(fixedSize int) func(linkName string, linkCid cid.Cid) int } func checkBasicDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*DyanmicDirectory).Directory.(*BasicDirectory); !ok { + if _, ok := dir.(*DynamicDirectory).Directory.(*BasicDirectory); !ok { t.Fatal(errorMessage) } } func checkHAMTDirectory(t *testing.T, dir Directory, errorMessage string) { - if _, ok := dir.(*DyanmicDirectory).Directory.(*HAMTDirectory); !ok { + if _, ok := dir.(*DynamicDirectory).Directory.(*HAMTDirectory); !ok { t.Fatal(errorMessage) } } @@ -244,7 +244,7 @@ func TestProductionLinkSize(t *testing.T) { } // Test HAMTDirectory <-> BasicDirectory switch based on directory size. The -// switch is managed by the DyanmicDirectory abstraction. +// switch is managed by the DynamicDirectory abstraction. func TestDynamicDirectorySwitch(t *testing.T) { oldHamtOption := HAMTShardingSize defer func() { HAMTShardingSize = oldHamtOption }() From 3781074471dc37997d050263949248177c7c5e6b Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 11 Nov 2021 21:30:28 -0300 Subject: [PATCH 36/38] fix description --- io/directory.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io/directory.go b/io/directory.go index 9335054ad..53541d2b5 100644 --- a/io/directory.go +++ b/io/directory.go @@ -525,7 +525,8 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) } // DynamicDirectory wraps a Directory interface and provides extra logic -// to upgrade from its BasicDirectory implementation to HAMTDirectory. +// to switch from BasicDirectory to HAMTDirectory and backwards based on +// size. type DynamicDirectory struct { Directory } From e46a992c45b00794e638980679fb411a4499e0a9 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 11 Nov 2021 21:44:41 -0300 Subject: [PATCH 37/38] fix complete hamt test --- io/completehamt_test.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/io/completehamt_test.go b/io/completehamt_test.go index 50be9649e..2af652e32 100644 --- a/io/completehamt_test.go +++ b/io/completehamt_test.go @@ -4,6 +4,7 @@ import ( "context" "encoding/binary" "fmt" + "github.com/ipfs/go-unixfs/internal" "math" "testing" @@ -24,13 +25,7 @@ import ( // childsPerNode ^ (treeHeight). // treeHeight: The number of layers of non-value HAMT nodes (e.g. height = 1 is a single shard pointing to some values) // FIXME: HAMTHashFunction needs to be set to idHash by the caller. We depend on -// this simplification for the current logic to work. (HAMTHashFunction is a -// global setting of the package, it is hard-coded in the serialized Shard node -// and not allowed to be changed on a per HAMT/Shard basis.) -// (If we didn't rehash inside setValue then we could just generate -// the fake hash as in io.Swap through `newHashBits()` and pass -// it as an argument making the hash independent of tree manipulation; that -// sounds as the correct way to go in general and we wouldn't need this.) +// this simplification for the current logic to work. func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int, childsPerNode int) (ipld.Node, error) { if treeHeight < 1 { panic("treeHeight < 1") @@ -79,7 +74,13 @@ func idHash(val []byte) []byte { return val } +// FIXME: This is not checking the exact height of the tree but just making +// sure there are as many children as we would have with a complete HAMT. func TestCreateCompleteShard(t *testing.T) { + oldHashFunc := internal.HAMTHashFunction + defer func() { internal.HAMTHashFunction = oldHashFunc }() + internal.HAMTHashFunction = idHash + ds := mdtest.Mock() childsPerNode := 16 treeHeight := 2 From 0088b7fa5649eba75747978277592565881a9caf Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 11 Nov 2021 21:57:49 -0300 Subject: [PATCH 38/38] remove fixme --- io/directory.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/io/directory.go b/io/directory.go index 53541d2b5..2ec862247 100644 --- a/io/directory.go +++ b/io/directory.go @@ -586,11 +586,6 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No // RemoveChild implements the `Directory` interface. Used in the case where we wrap // a HAMTDirectory that might need to be downgraded to a BasicDirectory. The // upgrade path is in AddChild. -// FIXME: Consider adding some margin in the comparison against HAMTShardingSize -// to avoid an eager enumeration at the first opportunity we go below it -// (in which case we would have the hard comparison in GetNode() to make -// sure we make good on the value). Finding the right margin can be tricky -// and very dependent on the use case so it might not be worth it. func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error { hamtDir, ok := d.Directory.(*HAMTDirectory) if !ok {