Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a single index Results when querying across blocks #1474

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/dbnode/integration/write_tagged_quorum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,13 @@ func nodeHasTaggedWrite(t *testing.T, s *testSetup) bool {
require.NoError(t, err)
results := res.Results
require.Equal(t, testNamespaces[0].String(), results.Namespace().String())
tags, ok := results.Map().Get(ident.StringID("quorumTest"))
idxFound := ok && ident.NewTagIterMatcher(ident.MustNewTagStringsIterator(
"foo", "bar", "boo", "baz")).Matches(ident.NewTagsIterator(tags))

var idxFound bool
results.WithMap(func(rMap *index.ResultsMap) {
tags, ok := rMap.Get(ident.StringID("quorumTest"))
idxFound = ok && ident.NewTagIterMatcher(ident.MustNewTagStringsIterator(
"foo", "bar", "boo", "baz")).Matches(ident.NewTagsIterator(tags))
})

if !idxFound {
return false
Expand Down
112 changes: 65 additions & 47 deletions src/dbnode/network/server/tchannelthrift/node/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,35 +289,45 @@ func (s *service) Query(tctx thrift.Context, req *rpc.QueryRequest) (*rpc.QueryR
}

result := &rpc.QueryResult_{
Results: make([]*rpc.QueryResultElement, 0, queryResult.Results.Map().Len()),
Results: make([]*rpc.QueryResultElement, 0, queryResult.Results.Size()),
Exhaustive: queryResult.Exhaustive,
}

fetchData := true
if req.NoData != nil && *req.NoData {
fetchData = false
}
for _, entry := range queryResult.Results.Map().Iter() {
elem := &rpc.QueryResultElement{
ID: entry.Key().String(),
Tags: make([]*rpc.Tag, 0, len(entry.Value().Values())),
}
result.Results = append(result.Results, elem)
for _, tag := range entry.Value().Values() {
elem.Tags = append(elem.Tags, &rpc.Tag{
Name: tag.Name.String(),
Value: tag.Value.String(),
})
}
if !fetchData {
continue
}
tsID := entry.Key()
datapoints, err := s.readDatapoints(ctx, nsID, tsID, start, end,
req.ResultTimeType)
if err != nil {
return nil, convert.ToRPCError(err)

var readErr error
queryResult.Results.WithMap(func(rMap *index.ResultsMap) {
for _, entry := range rMap.Iter() {
elem := &rpc.QueryResultElement{
ID: entry.Key().String(),
Tags: make([]*rpc.Tag, 0, len(entry.Value().Values())),
}
result.Results = append(result.Results, elem)
for _, tag := range entry.Value().Values() {
elem.Tags = append(elem.Tags, &rpc.Tag{
Name: tag.Name.String(),
Value: tag.Value.String(),
})
}
if !fetchData {
continue
}
tsID := entry.Key()
datapoints, err := s.readDatapoints(ctx, nsID, tsID, start, end,
req.ResultTimeType)
if err != nil {
// Break for error
readErr = err
return
}
elem.Datapoints = datapoints
}
elem.Datapoints = datapoints
})
if readErr != nil {
return nil, convert.ToRPCError(readErr)
}

return result, nil
Expand Down Expand Up @@ -422,33 +432,41 @@ func (s *service) FetchTagged(tctx thrift.Context, req *rpc.FetchTaggedRequest)
results := queryResult.Results
nsID := results.Namespace()
tagsIter := ident.NewTagsIterator(ident.Tags{})
for _, entry := range results.Map().Iter() {
tsID := entry.Key()
tags := entry.Value()
enc := s.pools.tagEncoder.Get()
ctx.RegisterFinalizer(enc)
tagsIter.Reset(tags)
encodedTags, err := s.encodeTags(enc, tagsIter)
if err != nil { // This is an invariant, should never happen
s.metrics.fetchTagged.ReportError(s.nowFn().Sub(callStart))
return nil, tterrors.NewInternalError(err)
}

elem := &rpc.FetchTaggedIDResult_{
NameSpace: nsID.Bytes(),
ID: tsID.Bytes(),
EncodedTags: encodedTags.Bytes(),
}
response.Elements = append(response.Elements, elem)
if !fetchData {
continue
}
segments, rpcErr := s.readEncoded(ctx, nsID, tsID, opts.StartInclusive, opts.EndExclusive)
if rpcErr != nil {
elem.Err = rpcErr
continue
var encodeErr error
results.WithMap(func(rMap *index.ResultsMap) {
for _, entry := range rMap.Iter() {
tsID := entry.Key()
tags := entry.Value()
enc := s.pools.tagEncoder.Get()
ctx.RegisterFinalizer(enc)
tagsIter.Reset(tags)
encodedTags, err := s.encodeTags(enc, tagsIter)
if err != nil {
encodeErr = err
return
}

elem := &rpc.FetchTaggedIDResult_{
NameSpace: nsID.Bytes(),
ID: tsID.Bytes(),
EncodedTags: encodedTags.Bytes(),
}
response.Elements = append(response.Elements, elem)
if !fetchData {
continue
}
segments, rpcErr := s.readEncoded(ctx, nsID, tsID, opts.StartInclusive, opts.EndExclusive)
if rpcErr != nil {
elem.Err = rpcErr
continue
}
elem.Segments = segments
}
elem.Segments = segments
})
if encodeErr != nil {
s.metrics.fetchTagged.ReportError(s.nowFn().Sub(callStart))
return nil, tterrors.NewInternalError(err)
}

s.metrics.fetchTagged.ReportSuccess(s.nowFn().Sub(callStart))
Expand Down
61 changes: 35 additions & 26 deletions src/dbnode/network/server/tchannelthrift/node/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,14 +193,16 @@ func TestServiceQuery(t *testing.T) {

resMap := index.NewResults(testIndexOptions)
resMap.Reset(ident.StringID(nsID))
resMap.Map().Set(ident.StringID("foo"), ident.NewTags(
ident.StringTag(tags["foo"][0].name, tags["foo"][0].value),
ident.StringTag(tags["foo"][1].name, tags["foo"][1].value),
))
resMap.Map().Set(ident.StringID("bar"), ident.NewTags(
ident.StringTag(tags["bar"][0].name, tags["bar"][0].value),
ident.StringTag(tags["bar"][1].name, tags["bar"][1].value),
))
resMap.WithMap(func(rMap *index.ResultsMap) {
rMap.Set(ident.StringID("foo"), ident.NewTags(
ident.StringTag(tags["foo"][0].name, tags["foo"][0].value),
ident.StringTag(tags["foo"][1].name, tags["foo"][1].value),
))
rMap.Set(ident.StringID("bar"), ident.NewTags(
ident.StringTag(tags["bar"][0].name, tags["bar"][0].value),
ident.StringTag(tags["bar"][1].name, tags["bar"][1].value),
))
})

mockDB.EXPECT().QueryIDs(
ctx,
Expand Down Expand Up @@ -1066,14 +1068,16 @@ func TestServiceFetchTagged(t *testing.T) {

resMap := index.NewResults(testIndexOptions)
resMap.Reset(ident.StringID(nsID))
resMap.Map().Set(ident.StringID("foo"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("baz", "dxk"),
))
resMap.Map().Set(ident.StringID("bar"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("dzk", "baz"),
))
resMap.WithMap(func(rMap *index.ResultsMap) {
rMap.Set(ident.StringID("foo"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("baz", "dxk"),
))
rMap.Set(ident.StringID("bar"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("dzk", "baz"),
))
})

mockDB.EXPECT().QueryIDs(
ctx,
Expand Down Expand Up @@ -1163,14 +1167,16 @@ func TestServiceFetchTaggedIsOverloaded(t *testing.T) {

resMap := index.NewResults(testIndexOptions)
resMap.Reset(ident.StringID(nsID))
resMap.Map().Set(ident.StringID("foo"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("baz", "dxk"),
))
resMap.Map().Set(ident.StringID("bar"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("dzk", "baz"),
))
resMap.WithMap(func(rMap *index.ResultsMap) {
rMap.Set(ident.StringID("foo"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("baz", "dxk"),
))
rMap.Set(ident.StringID("bar"), ident.NewTags(
ident.StringTag("foo", "bar"),
ident.StringTag("dzk", "baz"),
))
})

startNanos, err := convert.ToValue(start, rpc.TimeType_UNIX_NANOSECONDS)
require.NoError(t, err)
Expand Down Expand Up @@ -1216,8 +1222,11 @@ func TestServiceFetchTaggedNoData(t *testing.T) {

resMap := index.NewResults(testIndexOptions)
resMap.Reset(ident.StringID(nsID))
resMap.Map().Set(ident.StringID("foo"), ident.Tags{})
resMap.Map().Set(ident.StringID("bar"), ident.Tags{})
resMap.WithMap(func(rMap *index.ResultsMap) {
rMap.Set(ident.StringID("foo"), ident.Tags{})
rMap.Set(ident.StringID("bar"), ident.Tags{})
})

mockDB.EXPECT().QueryIDs(
ctx,
ident.NewIDMatcher(nsID),
Expand Down
65 changes: 5 additions & 60 deletions src/dbnode/storage/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,24 +895,16 @@ func (i *nsIndex) Query(
exhaustive bool
returned bool
}{
merged: nil,
merged: i.resultsPool.Get(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to call this merged now that we push it all the way down? Maybe just call it results

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure thing, renamed.

exhaustive: true,
returned: false,
}
)
defer func() {
// Ensure that during early error returns we let any aborted
// goroutines know not to try to modify/edit the result any longer.
results.Lock()
results.returned = true
results.Unlock()
}()
// Set the results namespace ID
results.merged.Reset(i.nsMetadata.ID())

execBlockQuery := func(block index.Block) {
blockResults := i.resultsPool.Get()
blockResults.Reset(i.nsMetadata.ID())

blockExhaustive, err := block.Query(query, opts, blockResults)
blockExhaustive, err := block.Query(query, opts, results.merged)
if err == index.ErrUnableToQueryBlockClosed {
// NB(r): Because we query this block outside of the results lock, it's
// possible this block may get closed if it slides out of retention, in
Expand All @@ -921,53 +913,14 @@ func (i *nsIndex) Query(
err = nil
}

var mergedResult bool
results.Lock()
defer func() {
results.Unlock()
if mergedResult {
// Only finalize this result if we merged it into another.
blockResults.Finalize()
}
}()

if results.returned {
// If already returned then we early cancelled, don't add any
// further results or errors since caller already has a result.
return
}
defer results.Unlock()

if err != nil {
results.multiErr = results.multiErr.Add(err)
return
}

if results.merged == nil {
// Return results to pool at end of request.
ctx.RegisterFinalizer(blockResults)
// No merged results yet, use this as the first to merge into.
results.merged = blockResults
} else {
// Append the block results.
mergedResult = true
size := results.merged.Size()
for _, entry := range blockResults.Map().Iter() {
// Break early if reached limit.
if opts.Limit > 0 && size >= opts.Limit {
blockExhaustive = false
break
}

// Append to merged results.
id, tags := entry.Key(), entry.Value()
_, size, err = results.merged.AddIDAndTags(id, tags)
if err != nil {
results.multiErr = results.multiErr.Add(err)
return
}
}
}

// If block had more data but we stopped early, need to notify caller.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super nit: This would read a lot better if you move this comment below the early return cause right now it seems like a bug at first glance cause you say need to notify the caller but then the code immediately returns without doing anything

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, will do.

if blockExhaustive {
return
Expand Down Expand Up @@ -1064,8 +1017,6 @@ func (i *nsIndex) Query(
}

results.Lock()
// Signal not to add any further results since we've returned already.
results.returned = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we losing the ability to do this signaling? Seems like this might make the impact of timed out queries even worse since they'll keep updating this map...Actually isn't it broken because they'll keep trying to add results to a map that may have been returned to the pool?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, where do the results get returned to the pool? I don't see it in this method or in the RPC method.

Regardless, if we want to pool this thing you may need to add ref-counting or some type of unique query identifier or something

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I now use a lifetime to protect against writing to the results once we return from the Query call to the index, this prevents writing to results during cancellation or any other early return code path.

// Take reference to vars to return while locked, need to allow defer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont understand this comment. What deadlock are you protecting against? The only Lock/Defer that is see is in the execBlockQuery func and I don't see how that would deadlock with this code

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll remove that statement, there used to be other stuff going on.

// lock/unlock cleanup to not deadlock with this locked code block.
exhaustive := results.exhaustive
Expand All @@ -1077,12 +1028,6 @@ func (i *nsIndex) Query(
return index.QueryResults{}, err
}

// If no blocks queried, return an empty result
if mergedResults == nil {
mergedResults = i.resultsPool.Get()
mergedResults.Reset(i.nsMetadata.ID())
}

return index.QueryResults{
Exhaustive: exhaustive,
Results: mergedResults,
Expand Down
Loading