Skip to content

Commit

Permalink
kvclient, server: implement SpanStats for use with coalesced ranges
Browse files Browse the repository at this point in the history
This commit provides a re-implementation of fetching the MVCC stats
and range count for a given span. The previous implementation relied on
the assumption that 1 range can contain at most 1 table/index. Since
GH-79700, this assumption is no longer true.

This re-implementation allows secondary tenants to access span stats
via the serverpb.TenantStatusServer interface.

If a roachpb.SpanStatsRequest has a node_id value of "0", instead of a specific
node id, the response will yield cluster-wide values. To achieve this,
the server does a fan-out to nodes that are known to have replicas for the
span requested.

The interaction between tenants is illustrated below:

 System Tenant
+----------------------------------------------------+
|                                                    |
|                                                    |
|                                    KV Node fan-out |
|                                     +----------+   |
|                                     |          |   |
|     +-------------------------------v----+     |   |
|     |                                    |     |   |
|     |      server.systemStatusServer     +-----+   |
|     |                                    |         |
|     +----------------+-------------------+         |
|                      |                             |
|                      |                             |
|                      |     via TenantStatusServer  |
|                      +--------------+              |
|                      |              |              |
|                      |              |              |
|                      |              v              |
|                      |        +-----------+        |
|                      |        |           |        |
|                      |        | SQLServer |        |
|                      |        |           |        |
|                      |        +-----------+        |
|                      |                             |
+----------------------v-----------------------------+
                       |
                       |
                       |
                       |   roachpb.SpanStatsResponse
                       |
                       |
 Secondary Tenant      |
+----------------------+------------------------------+
|                      |                              |
|         +------------v-------------+                |
|         |            |             |                |
|         |   kvtenantccl.Connector  |                |
|         |                          |                |
|         +------------+-------------+                |
|                      |                              |
|                      |    via TenantStatusServer    |
|                      +--------------------+         |
|                      |                    |         |
|                      |                    |         |
|                      |                    |         |
|    +-----------------v-------+     +------v------+  |
|    |                         |     |             |  |
|    |    server.statusServer  |     |  SQLServer  |  |
|    |                         |     |             |  |
|    +-------------------------+     +-------------+  |
|                                                     |
|                                                     |
|                                                     |
+-----------------------------------------------------+

Resolves #84105
Part of: https://cockroachlabs.atlassian.net/browse/CRDB-22711
Release note: None
  • Loading branch information
Zach Lite authored and zachlite committed Feb 14, 2023
1 parent 3b9d8cb commit 4fee2a4
Show file tree
Hide file tree
Showing 21 changed files with 595 additions and 297 deletions.
32 changes: 0 additions & 32 deletions docs/generated/http/full.md
Original file line number Diff line number Diff line change
Expand Up @@ -2930,42 +2930,10 @@ Support status: [reserved](#support-status)







| Field | Type | Label | Description | Support status |
| ----- | ---- | ----- | ----------- | -------------- |
| node_id | [string](#cockroach.server.serverpb.SpanStatsRequest-string) | | | [reserved](#support-status) |
| start_key | [bytes](#cockroach.server.serverpb.SpanStatsRequest-bytes) | | | [reserved](#support-status) |
| end_key | [bytes](#cockroach.server.serverpb.SpanStatsRequest-bytes) | | | [reserved](#support-status) |







#### Response Parameters







| Field | Type | Label | Description | Support status |
| ----- | ---- | ----- | ----------- | -------------- |
| range_count | [int32](#cockroach.server.serverpb.SpanStatsResponse-int32) | | | [reserved](#support-status) |
| approximate_disk_bytes | [uint64](#cockroach.server.serverpb.SpanStatsResponse-uint64) | | | [reserved](#support-status) |
| total_stats | [cockroach.storage.enginepb.MVCCStats](#cockroach.server.serverpb.SpanStatsResponse-cockroach.storage.enginepb.MVCCStats) | | | [reserved](#support-status) |







## Stacks

`GET /_status/stacks/{node_id}`
Expand Down
16 changes: 16 additions & 0 deletions pkg/ccl/kvccl/kvtenantccl/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,22 @@ func (c *Connector) SpanConfigConformance(
return report, nil
}

// SpanStats implements the serverpb.TenantStatusServer interface.
func (c *Connector) SpanStats(
ctx context.Context, req *roachpb.SpanStatsRequest,
) (*roachpb.SpanStatsResponse, error) {
var response *roachpb.SpanStatsResponse
err := c.withClient(ctx, func(ctx context.Context, c *client) error {
stats, err := c.SpanStats(ctx, req)
if err != nil {
return err
}
response = stats
return nil
})
return response, err
}

// GetAllSystemSpanConfigsThatApply implements the spanconfig.KVAccessor
// interface.
func (c *Connector) GetAllSystemSpanConfigsThatApply(
Expand Down
70 changes: 70 additions & 0 deletions pkg/ccl/serverccl/statusccl/tenant_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package statusccl

import (
"bytes"
"context"
gosql "database/sql"
"encoding/hex"
Expand Down Expand Up @@ -141,6 +142,75 @@ func TestTenantStatusAPI(t *testing.T) {
t.Run("tenant_hot_ranges", func(t *testing.T) {
testTenantHotRanges(ctx, t, testHelper)
})

t.Run("tenant_span_stats", func(t *testing.T) {
testTenantSpanStats(ctx, t, testHelper)
})
}

func testTenantSpanStats(ctx context.Context, t *testing.T, helper serverccl.TenantTestHelper) {
tenantA := helper.TestCluster().Tenant(0)
tenantB := helper.ControlCluster().Tenant(0)

aSpan := tenantA.GetTenant().Codec().TenantSpan()
bSpan := tenantB.GetTenant().Codec().TenantSpan()

t.Run("test tenant isolation", func(t *testing.T) {
_, err := tenantA.TenantStatusSrv().(serverpb.TenantStatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0", // 0 indicates we want stats from all nodes.
StartKey: roachpb.RKey(bSpan.Key),
EndKey: roachpb.RKey(bSpan.EndKey),
})
require.Error(t, err)
})

t.Run("test KV node fan-out", func(t *testing.T) {
_, tID, err := keys.DecodeTenantPrefix(aSpan.Key)
require.NoError(t, err)
tPrefix := keys.MakeTenantPrefix(tID)

makeKey := func(keys ...[]byte) roachpb.Key {
return bytes.Join(keys, nil)
}

controlStats, err := tenantA.TenantStatusSrv().(serverpb.TenantStatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0", // 0 indicates we want stats from all nodes.
StartKey: roachpb.RKey(aSpan.Key),
EndKey: roachpb.RKey(aSpan.EndKey),
})
require.NoError(t, err)

// Create a new range in this tenant.
_, _, err = helper.HostCluster().Server(0).SplitRange(makeKey(tPrefix, roachpb.Key("c")))
require.NoError(t, err)

// Wait for the split to finish and propagate.
err = helper.HostCluster().WaitForFullReplication()
require.NoError(t, err)

// Create 6 new keys across the tenant's ranges.
incKeys := []string{"a", "b", "bb", "d", "e", "f"}
for _, incKey := range incKeys {
// Prefix each key appropriately for this tenant.
k := makeKey(keys.MakeTenantPrefix(tID), []byte(incKey))
if _, err := helper.HostCluster().Server(0).DB().Inc(ctx, k, 5); err != nil {
t.Fatal(err)
}
}

stats, err := tenantA.TenantStatusSrv().(serverpb.TenantStatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0", // 0 indicates we want stats from all nodes.
StartKey: roachpb.RKey(aSpan.Key),
EndKey: roachpb.RKey(aSpan.EndKey),
})

require.NoError(t, err)
require.Equal(t, controlStats.RangeCount+1, stats.RangeCount)
require.Equal(t, controlStats.TotalStats.LiveCount+int64(len(incKeys)), stats.TotalStats.LiveCount)
})
}

func testTenantLogs(ctx context.Context, t *testing.T, helper serverccl.TenantTestHelper) {
Expand Down
1 change: 0 additions & 1 deletion pkg/kv/kvserver/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ go_test(
"client_spanconfigs_test.go",
"client_split_burst_test.go",
"client_split_test.go",
"client_status_test.go",
"client_store_test.go",
"client_tenant_test.go",
"client_test.go",
Expand Down
28 changes: 0 additions & 28 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/spanconfig"
"github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigstore"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/admission"
"github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb"
"github.com/cockroachdb/cockroach/pkg/util/contextutil"
Expand Down Expand Up @@ -3231,33 +3230,6 @@ func mapToHotReplicasInfo(repls []CandidateReplica) []HotReplicaInfo {
return hotRepls
}

// StoreKeySpanStats carries the result of a stats computation over a key range.
type StoreKeySpanStats struct {
ReplicaCount int
MVCC enginepb.MVCCStats
ApproximateDiskBytes uint64
}

// ComputeStatsForKeySpan computes the aggregated MVCCStats for all replicas on
// this store which contain any keys in the supplied range.
func (s *Store) ComputeStatsForKeySpan(startKey, endKey roachpb.RKey) (StoreKeySpanStats, error) {
var result StoreKeySpanStats

newStoreReplicaVisitor(s).UndefinedOrder().Visit(func(repl *Replica) bool {
desc := repl.Desc()
if bytes.Compare(startKey, desc.EndKey) >= 0 || bytes.Compare(desc.StartKey, endKey) >= 0 {
return true // continue
}
result.MVCC.Add(repl.GetMVCCStats())
result.ReplicaCount++
return true
})

var err error
result.ApproximateDiskBytes, err = s.TODOEngine().ApproximateDiskBytes(startKey.AsRawKey(), endKey.AsRawKey())
return result, err
}

// ReplicateQueueDryRun runs the given replica through the replicate queue
// (using the allocator) without actually carrying out any changes, returning
// all trace messages collected along the way.
Expand Down
3 changes: 3 additions & 0 deletions pkg/roachpb/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ proto_library(
"io-formats.proto",
"metadata.proto",
"span_config.proto",
"span_stats.proto",
],
strip_import_prefix = "/pkg",
visibility = ["//visibility:public"],
Expand All @@ -158,6 +159,7 @@ proto_library(
"@com_github_gogo_protobuf//gogoproto:gogo_proto",
"@com_google_protobuf//:duration_proto",
"@com_google_protobuf//:timestamp_proto",
"@go_googleapis//google/api:annotations_proto",
],
)

Expand All @@ -179,6 +181,7 @@ go_proto_library(
"//pkg/util/tracing/tracingpb",
"@com_github_cockroachdb_errors//errorspb",
"@com_github_gogo_protobuf//gogoproto",
"@org_golang_google_genproto//googleapis/api/annotations:go_default_library",
],
)

Expand Down
38 changes: 38 additions & 0 deletions pkg/roachpb/span_stats.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

syntax = "proto3";
package cockroach.roachpb;
option go_package = "roachpb";

import "storage/enginepb/mvcc.proto";
import "gogoproto/gogo.proto";
import "google/api/annotations.proto";

// SpanStatsRequest is used to request a SpanStatsResponse for the given key
// span and node id. A node_id value of 0 indicates that the server should
// fan-out to all nodes, and the resulting SpanStatsResponse is a cumulative
// result from across the cluster.
message SpanStatsRequest {
string node_id = 1 [(gogoproto.customname) = "NodeID"];
bytes start_key = 2 [(gogoproto.casttype) = "RKey"];
bytes end_key = 3 [(gogoproto.casttype) = "RKey"];
}

message SpanStatsResponse {
// range_count measures the number of ranges that the request span falls within.
// A SpanStatsResponse for a span that lies within a range, and whose start
// key sorts after the range start, and whose end key sorts before the
// range end, will have a range_count value of 1.
int32 range_count = 2;
uint64 approximate_disk_bytes = 3;
cockroach.storage.enginepb.MVCCStats total_stats = 1
[(gogoproto.nullable) = false];
}
12 changes: 12 additions & 0 deletions pkg/rpc/auth_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ func (a tenantAuthorizer) authorize(
case "/cockroach.server.serverpb.Status/TransactionContentionEvents":
return a.authTenant(tenID)

case "/cockroach.server.serverpb.Status/SpanStats":
return a.authSpanStats(tenID, req.(*roachpb.SpanStatsRequest))

case "/cockroach.roachpb.Internal/GetSpanConfigs":
return a.authGetSpanConfigs(tenID, req.(*roachpb.GetSpanConfigsRequest))

Expand Down Expand Up @@ -224,6 +227,15 @@ func (a tenantAuthorizer) authGetRangeDescriptors(
return validateSpan(tenID, args.Span)
}

func (a tenantAuthorizer) authSpanStats(
tenID roachpb.TenantID, args *roachpb.SpanStatsRequest,
) error {
return validateSpan(tenID, roachpb.Span{
Key: args.StartKey.AsRawKey(),
EndKey: args.EndKey.AsRawKey(),
})
}

// authRangeLookup authorizes the provided tenant to invoke the RangeLookup RPC
// with the provided args.
func (a tenantAuthorizer) authRangeLookup(
Expand Down
2 changes: 2 additions & 0 deletions pkg/server/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ go_library(
"init.go",
"init_handshake.go",
"initial_sql.go",
"key_visualizer_server.go",
"listen_and_update_addrs.go",
"load_endpoint.go",
"loss_of_quorum.go",
Expand Down Expand Up @@ -412,6 +413,7 @@ go_test(
"server_test.go",
"settings_cache_test.go",
"settings_test.go",
"span_stats_test.go",
"statements_test.go",
"stats_test.go",
"status_ext_test.go",
Expand Down
6 changes: 3 additions & 3 deletions pkg/server/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -1335,7 +1335,7 @@ func (s *adminServer) statsForSpan(
}
type nodeResponse struct {
nodeID roachpb.NodeID
resp *serverpb.SpanStatsResponse
resp *roachpb.SpanStatsResponse
err error
}

Expand All @@ -1351,13 +1351,13 @@ func (s *adminServer) statsForSpan(
},
func(ctx context.Context) {
// Set a generous timeout on the context for each individual query.
var spanResponse *serverpb.SpanStatsResponse
var spanResponse *roachpb.SpanStatsResponse
err := contextutil.RunWithTimeout(ctx, "request remote stats", 20*time.Second,
func(ctx context.Context) error {
conn, err := s.serverIterator.dialNode(ctx, serverID(nodeID))
if err == nil {
client := serverpb.NewStatusClient(conn)
req := serverpb.SpanStatsRequest{
req := roachpb.SpanStatsRequest{
StartKey: rSpan.Key,
EndKey: rSpan.EndKey,
NodeID: nodeID.String(),
Expand Down
Loading

0 comments on commit 4fee2a4

Please sign in to comment.