Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-25547: PTP metrics still includes metrics for old config after deleting a ptpconfig #281

Merged
merged 1 commit into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 108 additions & 46 deletions plugins/ptp_operator/event/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package event

import (
"strings"
"sync"

"github.com/prometheus/client_golang/prometheus"
Expand All @@ -36,6 +37,8 @@ const (
GNSS ClockSourceType = "gnss"
// DPLL ... ClockSourceType = "dpll"
DPLL ClockSourceType = "dpll"
// GM ... ClockSourceType
GM ClockSourceType = "GM"
)

// DependingClockState ...
Expand All @@ -45,7 +48,7 @@ type DependingClockState []*ClockState
type PTPEventState struct {
sync.Mutex
CurrentPTPStateEvent ptp.SyncState
DependsOn map[string]DependingClockState // [dpll][ens2f0]State
DependsOn map[string]DependingClockState // [dpll][]*ClockState
Type ptp.EventType
}

Expand Down Expand Up @@ -92,79 +95,96 @@ func (dt *DependingClockState) hasMetricHelp() map[string]string {
}

// UpdateCurrentEventState ...
func (p *PTPEventState) UpdateCurrentEventState(c ClockState) ptp.SyncState {
func (p *PTPEventState) UpdateCurrentEventState(c ClockState, metrics map[string]*PMetric, help map[string]string) ptp.SyncState {
p.Lock()
defer func() {
p.Unlock()
}()
// if the current state is not locked and the new state is locked then update the current state
if dep, ok := p.DependsOn[c.Process]; ok {
index, d := dep.hasClockState(*c.IFace)
index, clockState := dep.hasClockState(*c.IFace)
// update the clock state if found or else
if index == -1 {
d = &ClockState{}
dep = append(dep, d)
clockState = &ClockState{}
clockState.Metric = dep.hasMetric() //possible same metrics available outside the nic stats
clockState.HelpText = dep.hasMetricHelp()
dep = append(dep, clockState)
p.DependsOn[c.Process] = dep
}
d.Offset = c.Offset
d.IFace = c.IFace
d.State = c.State
d.Process = c.Process
d.Value = c.Value
d.ClockSource = c.ClockSource
clockState.Offset = c.Offset
clockState.IFace = c.IFace
clockState.State = c.State
clockState.Process = c.Process
clockState.Value = c.Value
clockState.ClockSource = c.ClockSource
clockState.NodeName = c.NodeName
// update the metric
if c.Offset != nil {
d.Offset = pointer.Float64(*c.Offset)
clockState.Offset = pointer.Float64(*c.Offset)
}

for k, v := range c.Value {
iface := ""
if d.IFace != nil {
iface = *d.IFace
}
if d.Metric == nil {
d.Metric = dep.hasMetric()
d.HelpText = dep.hasMetricHelp()
}
if d.Metric[k].metricGauge != nil {
d.Metric[k].metricGauge.With(map[string]string{"from": d.Process, "process": d.Process,
"node": d.NodeName, "iface": iface}).Set(float64(v))
if clockState.IFace != nil {
iface := *clockState.IFace
r := []rune(iface)
alias := string(r[:len(r)-1]) + "x"
for k, v := range c.Value {
if clockState.Metric[k].metricGauge != nil {
clockState.Metric[k].metricGauge.With(map[string]string{"from": clockState.Process, "process": clockState.Process,
"node": clockState.NodeName, "iface": alias}).Set(float64(v))
} else {
log.Infof("metric object was not found for %s=%s", iface, k)
}
}
} else {
log.Error("interface name was empty, skipping creation of metrics")
}
} else {
clockState := &ClockState{
State: c.State,
Process: c.Process,
Value: c.Value,
ClockSource: c.ClockSource,
NodeName: c.NodeName,
}
if c.Offset != nil {
clockState.Offset = pointer.Float64(*c.Offset)
}
if c.IFace != nil {
clockState.IFace = pointer.String(*c.IFace)
}
metrics := map[string]*PMetric{}
if metrics == nil {
metrics = map[string]*PMetric{}
help = map[string]string{}
}

for k, v := range c.Value {
// create metrics
metrics[k] = &PMetric{
isRegistered: true,
metricGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: ptpNamespace,
Subsystem: ptpSubsystem,
Name: k,
Help: func() string {
if h, ok2 := c.HelpText[k]; ok2 {
return h
}
return ""
}(),
}, []string{"from", "process", "node", "iface"}),
metricCounter: nil,
if m, hasMetric := metrics[k]; hasMetric {
metrics[k] = m
if h, hasHelpTxt := help[k]; hasHelpTxt {
clockState.HelpText[k] = h // expecets to have help text for value
}
} else {
metrics[k] = &PMetric{
isRegistered: true,
metricGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: ptpNamespace,
Subsystem: ptpSubsystem,
Name: k,
Help: func() string {
if h, ok2 := c.HelpText[k]; ok2 {
return h
}
return ""
}(),
}, []string{"from", "process", "node", "iface"}),
metricCounter: nil,
}
}

err := prometheus.Register(metrics[k].metricGauge)
if err != nil {
log.Info("ignore, metric is already registered")
log.Infof("ignore, metric for %s is already for interface %s and process %s", k, *clockState.IFace, clockState.Process)
}
iface := ""
if clockState.IFace != nil {
Expand All @@ -176,9 +196,9 @@ func (p *PTPEventState) UpdateCurrentEventState(c ClockState) ptp.SyncState {
"node": clockState.NodeName, "iface": alias}).Set(float64(v))
}
clockState.Metric = metrics
p.DependsOn[c.Process] = []*ClockState{clockState}
p.DependsOn[clockState.Process] = []*ClockState{clockState}
}

log.Infof("depends on end %s", p.PrintDependsOn())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove temporary logs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

// if all locked then its locked
// if anyone HOLDOVER then holdover
// if anyone FREERUN then freerun
Expand Down Expand Up @@ -240,24 +260,66 @@ func (p *PTPEventState) UnRegisterAllMetrics() {
// DeleteAllMetrics ... delete all metrics
//
// write a functions to delete meteric from dependson object
func (p *PTPEventState) DeleteAllMetrics() {
func (p *PTPEventState) DeleteAllMetrics(m []*prometheus.GaugeVec) {
// write loop p.DependsOn
if p.DependsOn == nil {
return
}
for _, d := range p.DependsOn {
// write loop d.Metric
for _, dd := range d {
if dd.IFace == nil {
continue
}
r := []rune(*dd.IFace)
jzding marked this conversation as resolved.
Show resolved Hide resolved
alias := string(r[:len(r)-1]) + "x"
if dd.Metric != nil {
// unregister metric
for _, v := range dd.Metric {
if v.metricGauge != nil && dd.IFace != nil {
v.metricGauge.Delete(prometheus.Labels{"process": dd.Process, "iface": *dd.IFace, "node": dd.NodeName})
v.metricGauge.Delete(prometheus.Labels{"process": dd.Process, "iface": alias, "node": dd.NodeName})
prometheus.Unregister(v.metricGauge)
}
}
for _, mm := range m {
mm.Delete(prometheus.Labels{
"process": dd.Process, "from": dd.Process, "node": dd.NodeName, "iface": alias})
// find metrics without from - click clock state
mm.Delete(prometheus.Labels{
"process": dd.Process, "node": dd.NodeName, "iface": alias})
}
}
delete(p.DependsOn, dd.Process)
}
}
}

// PrintDependsOn ...
func (p *PTPEventState) PrintDependsOn() string {
out := strings.Builder{}
for process, d := range p.DependsOn {
out.WriteString("Depending Process (key): " + process + "\n")
for _, dd := range d {
if dd.IFace != nil {
out.WriteString("IFace " + *dd.IFace + "\n")
}
out.WriteString("ClockSource " + string(dd.ClockSource) + "\n")
out.WriteString("Process " + dd.Process + "\n")
}
out.WriteString("\n")
}
return out.String()
}

// PrintClockState ...
func (c *ClockState) PrintClockState() string {
out := strings.Builder{}
out.WriteString("ClockState Process: " + c.Process + "\n")
if c.IFace != nil {
out.WriteString("Iface: " + *c.IFace + "\n")
}
for k := range c.Value {
out.WriteString("Name: " + k + "\n")
}
return out.String()
}
80 changes: 79 additions & 1 deletion plugins/ptp_operator/event/event_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ import (
"testing"
)

var (
depObject = &event.PTPEventState{DependsOn: map[string]event.DependingClockState{}}
)

type inputState struct {
state ptp.SyncState
process string
Expand All @@ -37,6 +41,73 @@ type eventTestCase struct {
input inputState
}

type ClockStateTestCase struct {
expectedCount int
eventStateObject *event.PTPEventState
input event.ClockState
}

var clockStateTestCase = []ClockStateTestCase{{
eventStateObject: depObject,
input: event.ClockState{
State: ptp.FREERUN,
Offset: pointer.Float64(01),
IFace: pointer.String("ens01"),
Process: "dpll",
Value: map[string]int64{"frequency_status": 2, "phase_status": 3, "pps_status": 1},
ClockSource: event.DPLL,
NodeName: "test",
HelpText: map[string]string{
"frequency_status": "-1=UNKNOWN, 0=INVALID, 1=FREERUN, 2=LOCKED, 3=LOCKED_HO_ACQ, 4=HOLDOVER",
"phase_status": "-1=UNKNOWN, 0=INVALID, 1=FREERUN, 2=LOCKED, 3=LOCKED_HO_ACQ, 4=HOLDOVER",
"pps_status": "0=UNAVAILABLE, 1=AVAILABLE",
},
},
expectedCount: 1,
},
{eventStateObject: depObject,
input: event.ClockState{
State: ptp.FREERUN,
Offset: pointer.Float64(01),
IFace: pointer.String("ens02"),
Process: "dpll",
Value: map[string]int64{"frequency_status": 2, "phase_status": 3, "pps_status": 1},
ClockSource: event.DPLL,
NodeName: "test",
HelpText: map[string]string{
"frequency_status": "-1=UNKNOWN, 0=INVALID, 1=FREERUN, 2=LOCKED, 3=LOCKED_HO_ACQ, 4=HOLDOVER",
"phase_status": "-1=UNKNOWN, 0=INVALID, 1=FREERUN, 2=LOCKED, 3=LOCKED_HO_ACQ, 4=HOLDOVER",
"pps_status": "0=UNAVAILABLE, 1=AVAILABLE",
},
},
expectedCount: 2,
},
{eventStateObject: depObject,
input: event.ClockState{
State: ptp.FREERUN,
Offset: pointer.Float64(01),
IFace: pointer.String("ens01"),
Process: "GM",
Value: nil,
Metric: nil,
ClockSource: event.GM,
NodeName: "test",
},
expectedCount: 1,
},
{eventStateObject: depObject,
input: event.ClockState{
State: ptp.FREERUN,
Offset: pointer.Float64(01),
IFace: pointer.String("ens01"),
Process: "gnss",
Value: map[string]int64{"gnss_status": 3},
ClockSource: event.GNSS,
HelpText: map[string]string{"gnss_status": "0=NOFIX, 1=Dead Reckoning Only, 2=2D-FIX, 3=3D-FIX, 4=GPS+dead reckoning fix, 5=Time only fix"},
NodeName: "test",
},
expectedCount: 1,
}}
var testCase = []eventTestCase{{
eventStateObject: &event.PTPEventState{
CurrentPTPStateEvent: ptp.FREERUN,
Expand Down Expand Up @@ -131,7 +202,14 @@ func Test_UpdateEventState(t *testing.T) {
State: tc.input.state,
Offset: tc.input.offset,
Process: tc.input.process,
})
}, nil, nil)
assert.Equal(t, tc.expectedState, cSTate)
}
}

func TestPTPEventState_UpdateCurrentEventState(t *testing.T) {
for _, tc := range clockStateTestCase {
tc.eventStateObject.UpdateCurrentEventState(tc.input, nil, nil)
assert.Equal(t, tc.expectedCount, len(tc.eventStateObject.DependsOn[tc.input.Process]))
}
}
13 changes: 6 additions & 7 deletions plugins/ptp_operator/metrics/logparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ func (p *PTPEventManager) ParseGMLogs(processName, configName, output string, fi
State: GetSyncState(syncState),
IFace: pointer.String(iface),
Process: processName,
ClockSource: gmProcessName,
ClockSource: event.GM,
Value: nil,
Metric: nil,
NodeName: ptpNodeName,
Expand All @@ -370,7 +370,7 @@ func (p *PTPEventManager) ParseGMLogs(processName, configName, output string, fi

SyncState.With(map[string]string{"process": processName, "node": ptpNodeName, "iface": alias}).Set(GetSyncStateID(syncState))
// status metrics
ptpStats[masterType].SetPtpDependentEventState(clockState)
ptpStats[masterType].SetPtpDependentEventState(clockState, ptpStats.HasMetrics(processName), ptpStats.HasMetricHelp(processName))
ptpStats[masterType].SetLastSyncState(clockState.State)
ptpStats[masterType].SetAlias(alias)

Expand Down Expand Up @@ -444,17 +444,16 @@ logStatusLoop:
IFace: iface,
Value: map[string]int64{"frequency_status": frequencyStatus, "phase_status": int64(phaseStatus),
"pps_status": int64(ppsStatus)},
ClockSource: DPLL,
ClockSource: event.DPLL,
NodeName: ptpNodeName,
HelpText: map[string]string{
"frequency_status": "-1=UNKNOWN, 0=INVALID, 1=FREERUN, 2=LOCKED, 3=LOCKED_HO_ACQ, 4=HOLDOVER",
"phase_status": "-1=UNKNOWN, 0=INVALID, 1=FREERUN, 2=LOCKED, 3=LOCKED_HO_ACQ, 4=HOLDOVER",
"pps_status": "0=UNAVAILABLE, 1=AVAILABLE",
},
})
}, ptpStats.HasMetrics(processName), ptpStats.HasMetricHelp(processName))
SyncState.With(map[string]string{"process": processName, "node": ptpNodeName, "iface": alias}).Set(GetSyncStateID(syncState))
UpdatePTPOffsetMetrics(processName, processName, alias, dpllOffset)
UpdatePpsStatusMetrics(processName, alias, ppsStatus)
} else {
log.Errorf("error parsing dpll %s", err.Error())
}
Expand Down Expand Up @@ -507,10 +506,10 @@ func (p *PTPEventManager) ParseGNSSLogs(processName, configName, output string,
Process: processName,
IFace: iface,
Value: map[string]int64{"gnss_status": gnssState},
ClockSource: GNSS,
ClockSource: event.GNSS,
NodeName: ptpNodeName,
HelpText: map[string]string{"gnss_status": "0=NOFIX, 1=Dead Reckoning Only, 2=2D-FIX, 3=3D-FIX, 4=GPS+dead reckoning fix, 5=Time only fix"},
})
}, ptpStats.HasMetrics(processName), ptpStats.HasMetricHelp(processName))
// reduce noise ; if state changed then send events
if lastState != GetSyncState(syncState) || errState != nil {
log.Infof("%s last state %s and current state %s", processName, lastState, GetSyncState(syncState))
Expand Down
Loading
Loading