diff --git a/tests/fixture/tmpnet/network.go b/tests/fixture/tmpnet/network.go index 63e2306eb200..c7344c7a02f7 100644 --- a/tests/fixture/tmpnet/network.go +++ b/tests/fixture/tmpnet/network.go @@ -510,23 +510,6 @@ func (n *Network) StartNode(ctx context.Context, log logging.Logger, node *Node) // Restart a single node. func (n *Network) RestartNode(ctx context.Context, log logging.Logger, node *Node) error { - // Ensure the node reuses the same API port across restarts to ensure - // consistent labeling of metrics. Otherwise prometheus's automatic - // addition of the `instance` label (host:port) results in - // segmentation of results for a given node every time the port - // changes on restart. This segmentation causes graphs on the grafana - // dashboards to display multiple series per graph for a given node, - // one for each port that the node used. - // - // There is a non-zero chance of the port being allocatted to a - // different process and the node subsequently being unable to start, - // but the alternative is having to update the grafana dashboards - // query-by-query to ensure that node metrics ignore the instance - // label. - if err := node.SaveAPIPort(); err != nil { - return err - } - if err := node.Stop(ctx); err != nil { return fmt.Errorf("failed to stop node %s: %w", node.NodeID, err) } diff --git a/tests/fixture/tmpnet/node.go b/tests/fixture/tmpnet/node.go index 570d1d616d5d..3be00dae5324 100644 --- a/tests/fixture/tmpnet/node.go +++ b/tests/fixture/tmpnet/node.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" "io" - "net" "net/http" "net/netip" "os" @@ -366,19 +365,10 @@ func (n *Node) EnsureNodeID() error { return nil } -// Saves the currently allocated API port to the node's configuration -// for use across restarts. Reusing the port ensures consistent -// labeling of metrics. -func (n *Node) SaveAPIPort() error { - hostPort := strings.TrimPrefix(n.URI, "http://") - if len(hostPort) == 0 { - // Without an API URI there is nothing to save - return nil - } - _, port, err := net.SplitHostPort(hostPort) - if err != nil { - return err - } - n.Flags[config.HTTPPortKey] = port - return nil +// GetUniqueID returns a globally unique identifier for the node. +func (n *Node) GetUniqueID() string { + nodeIDString := n.NodeID.String() + startIndex := len(ids.NodeIDPrefix) + endIndex := startIndex + 8 // 8 characters should be enough to identify a node in the context of its network + return n.NetworkUUID + "-" + strings.ToLower(nodeIDString[startIndex:endIndex]) } diff --git a/tests/fixture/tmpnet/node_process.go b/tests/fixture/tmpnet/node_process.go index 2f16ddd19a0d..b3bb5c97aadd 100644 --- a/tests/fixture/tmpnet/node_process.go +++ b/tests/fixture/tmpnet/node_process.go @@ -247,6 +247,10 @@ func (p *NodeProcess) getProcess() (*os.Process, error) { func (p *NodeProcess) writeMonitoringConfig() error { // Ensure labeling that uniquely identifies the node and its network commonLabels := FlagsMap{ + // Explicitly setting an instance label avoids the default + // behavior of using the node's URI since the URI isn't + // guaranteed stable (e.g. port may change after restart). + "instance": p.node.GetUniqueID(), "network_uuid": p.node.NetworkUUID, "node_id": p.node.NodeID, "is_ephemeral_node": strconv.FormatBool(p.node.IsEphemeral),