Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add collector for Linux kTLS statss #2950

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,42 @@ node_ksmd_run 1
# HELP node_ksmd_sleep_seconds ksmd 'sleep_millisecs' file.
# TYPE node_ksmd_sleep_seconds gauge
node_ksmd_sleep_seconds 0.02
# HELP node_ktls_tls_curr_rx_device number of RX sessions currently installed where NIC handles cryptography
# TYPE node_ktls_tls_curr_rx_device gauge
node_ktls_tls_curr_rx_device 0
# HELP node_ktls_tls_curr_rx_sw number of RX sessions currently installed where host handles cryptography
# TYPE node_ktls_tls_curr_rx_sw gauge
node_ktls_tls_curr_rx_sw 5
# HELP node_ktls_tls_curr_tx_device number of TX sessions currently installed where NIC handles cryptography
# TYPE node_ktls_tls_curr_tx_device gauge
node_ktls_tls_curr_tx_device 0
# HELP node_ktls_tls_curr_tx_sw number of TX sessions currently installed where host handles cryptography
# TYPE node_ktls_tls_curr_tx_sw gauge
node_ktls_tls_curr_tx_sw 5
# HELP node_ktls_tls_decrypt_error_total record decryption failed (e.g. due to incorrect authentication tag)
# TYPE node_ktls_tls_decrypt_error_total counter
node_ktls_tls_decrypt_error_total 0
# HELP node_ktls_tls_decrypt_retry_total number of RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction
# TYPE node_ktls_tls_decrypt_retry_total counter
node_ktls_tls_decrypt_retry_total 0
# HELP node_ktls_tls_no_pad_violation_total number of data RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction
# TYPE node_ktls_tls_no_pad_violation_total counter
node_ktls_tls_no_pad_violation_total 0
# HELP node_ktls_tls_rx_device_resync_total number of RX resyncs sent to NICs handling cryptography
# TYPE node_ktls_tls_rx_device_resync_total counter
node_ktls_tls_rx_device_resync_total 0
# HELP node_ktls_tls_rx_device_total number of RX sessions opened with NIC cryptograph
# TYPE node_ktls_tls_rx_device_total counter
node_ktls_tls_rx_device_total 0
# HELP node_ktls_tls_rx_sw_total number of RX sessions opened with host cryptography
# TYPE node_ktls_tls_rx_sw_total counter
node_ktls_tls_rx_sw_total 178
# HELP node_ktls_tls_tx_device_total number of TX sessions opened with NIC cryptograph
# TYPE node_ktls_tls_tx_device_total counter
node_ktls_tls_tx_device_total 0
# HELP node_ktls_tls_tx_sw_total number of TX sessions opened with host cryptography
# TYPE node_ktls_tls_tx_sw_total counter
node_ktls_tls_tx_sw_total 161
# HELP node_lnstat_allocs_total linux network cache stats
# TYPE node_lnstat_allocs_total counter
node_lnstat_allocs_total{cpu="0",subsystem="arp_cache"} 1
Expand Down Expand Up @@ -2914,6 +2950,7 @@ node_scrape_collector_success{collector="infiniband"} 1
node_scrape_collector_success{collector="interrupts"} 1
node_scrape_collector_success{collector="ipvs"} 1
node_scrape_collector_success{collector="ksmd"} 1
node_scrape_collector_success{collector="ktls"} 1
node_scrape_collector_success{collector="lnstat"} 1
node_scrape_collector_success{collector="loadavg"} 1
node_scrape_collector_success{collector="mdadm"} 1
Expand Down
37 changes: 37 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1340,6 +1340,42 @@ node_ksmd_run 1
# HELP node_ksmd_sleep_seconds ksmd 'sleep_millisecs' file.
# TYPE node_ksmd_sleep_seconds gauge
node_ksmd_sleep_seconds 0.02
# HELP node_ktls_tls_curr_rx_device number of RX sessions currently installed where NIC handles cryptography
# TYPE node_ktls_tls_curr_rx_device gauge
node_ktls_tls_curr_rx_device 0
# HELP node_ktls_tls_curr_rx_sw number of RX sessions currently installed where host handles cryptography
# TYPE node_ktls_tls_curr_rx_sw gauge
node_ktls_tls_curr_rx_sw 5
# HELP node_ktls_tls_curr_tx_device number of TX sessions currently installed where NIC handles cryptography
# TYPE node_ktls_tls_curr_tx_device gauge
node_ktls_tls_curr_tx_device 0
# HELP node_ktls_tls_curr_tx_sw number of TX sessions currently installed where host handles cryptography
# TYPE node_ktls_tls_curr_tx_sw gauge
node_ktls_tls_curr_tx_sw 5
# HELP node_ktls_tls_decrypt_error_total record decryption failed (e.g. due to incorrect authentication tag)
# TYPE node_ktls_tls_decrypt_error_total counter
node_ktls_tls_decrypt_error_total 0
# HELP node_ktls_tls_decrypt_retry_total number of RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction
# TYPE node_ktls_tls_decrypt_retry_total counter
node_ktls_tls_decrypt_retry_total 0
# HELP node_ktls_tls_no_pad_violation_total number of data RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction
# TYPE node_ktls_tls_no_pad_violation_total counter
node_ktls_tls_no_pad_violation_total 0
# HELP node_ktls_tls_rx_device_resync_total number of RX resyncs sent to NICs handling cryptography
# TYPE node_ktls_tls_rx_device_resync_total counter
node_ktls_tls_rx_device_resync_total 0
# HELP node_ktls_tls_rx_device_total number of RX sessions opened with NIC cryptograph
# TYPE node_ktls_tls_rx_device_total counter
node_ktls_tls_rx_device_total 0
# HELP node_ktls_tls_rx_sw_total number of RX sessions opened with host cryptography
# TYPE node_ktls_tls_rx_sw_total counter
node_ktls_tls_rx_sw_total 178
# HELP node_ktls_tls_tx_device_total number of TX sessions opened with NIC cryptograph
# TYPE node_ktls_tls_tx_device_total counter
node_ktls_tls_tx_device_total 0
# HELP node_ktls_tls_tx_sw_total number of TX sessions opened with host cryptography
# TYPE node_ktls_tls_tx_sw_total counter
node_ktls_tls_tx_sw_total 161
# HELP node_lnstat_allocs_total linux network cache stats
# TYPE node_lnstat_allocs_total counter
node_lnstat_allocs_total{cpu="0",subsystem="arp_cache"} 1
Expand Down Expand Up @@ -2936,6 +2972,7 @@ node_scrape_collector_success{collector="infiniband"} 1
node_scrape_collector_success{collector="interrupts"} 1
node_scrape_collector_success{collector="ipvs"} 1
node_scrape_collector_success{collector="ksmd"} 1
node_scrape_collector_success{collector="ktls"} 1
node_scrape_collector_success{collector="lnstat"} 1
node_scrape_collector_success{collector="loadavg"} 1
node_scrape_collector_success{collector="mdadm"} 1
Expand Down
12 changes: 12 additions & 0 deletions collector/fixtures/proc/net/tls_stat
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
TlsCurrTxSw 5
TlsCurrRxSw 5
TlsCurrTxDevice 0
TlsCurrRxDevice 0
TlsTxSw 161
TlsRxSw 178
TlsTxDevice 0
TlsRxDevice 0
TlsDecryptError 0
TlsRxDeviceResync 0
TlsDecryptRetry 0
TlsRxNoPadViolation 0
130 changes: 130 additions & 0 deletions collector/ktls_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !noktls
// +build !noktls

package collector

import (
"fmt"

"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs"
)

type ktlsCollector struct {
fs procfs.FS
logger log.Logger
}

func init() {
registerCollector("ktls", defaultDisabled, NewKTLSCollector)
}

// NewKTLSCollector returns a new Collector exposing kTLS stats.
func NewKTLSCollector(logger log.Logger) (Collector, error) {
fs, err := procfs.NewFS(*procPath)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}

return &ktlsCollector{
fs: fs,
logger: logger,
}, nil
}

func (c *ktlsCollector) Update(ch chan<- prometheus.Metric) error {
stat, err := c.fs.NewTLSStat()
if err != nil {
return err
}

ktlsCurrTxSwDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_tx_sw"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make this a const:

Suggested change
prometheus.BuildFQName(namespace, "ktls", "tls_curr_tx_sw"),
const ktlsSubsystem = "ktls"
...
prometheus.BuildFQName(namespace, ktlsSubsystem, "tls_curr_tx_sw"),

"number of TX sessions currently installed where host handles cryptography",
nil, nil,
)
ktlsCurrRxSwDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_rx_sw"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These metric names are a bit confusing compared to the help text.

Maybe something like this? We try and make metric names more human friendly.

Suggested change
prometheus.BuildFQName(namespace, "ktls", "tls_curr_rx_sw"),
prometheus.BuildFQName(namespace, "ktls", "receive_sessions"),

"number of RX sessions currently installed where host handles cryptography",
nil, nil,
)
ktlsCurrTxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_tx_device"),
"number of TX sessions currently installed where NIC handles cryptography",
nil, nil,
)
ktlsCurrRxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_rx_device"),
"number of RX sessions currently installed where NIC handles cryptography",
nil, nil,
)
ktlsTxDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_tx_sw_total"),
"number of TX sessions opened with host cryptography",
nil, nil,
)
ktlsRxDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_rx_sw_total"),
"number of RX sessions opened with host cryptography",
nil, nil,
)
ktlsTxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_tx_device_total"),
"number of TX sessions opened with NIC cryptograph",
nil, nil,
)
ktlsRxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_rx_device_total"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what "device" is, this appears to be about sessions.

Suggested change
prometheus.BuildFQName(namespace, "ktls", "tls_rx_device_total"),
prometheus.BuildFQName(namespace, "ktls", "tls_rx_sessions_total"),

"number of RX sessions opened with NIC cryptograph",
nil, nil,
)
ktlsDecryptErrorDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_decrypt_error_total"),
"record decryption failed (e.g. due to incorrect authentication tag)",
nil, nil,
)
ktlsRxDeviceResyncDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_rx_device_resync_total"),
"number of RX resyncs sent to NICs handling cryptography",
nil, nil,
)
ktlsDecryptRetryDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_decrypt_retry_total"),
"number of RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction",
nil, nil,
)
ktlsRxNoPadViolationDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_no_pad_violation_total"),
"number of data RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction",
nil, nil,
)

ch <- prometheus.MustNewConstMetric(ktlsCurrTxSwDesc, prometheus.GaugeValue, float64(stat.TLSCurrTxSw))
ch <- prometheus.MustNewConstMetric(ktlsCurrRxSwDesc, prometheus.GaugeValue, float64(stat.TLSCurrTxSw))
ch <- prometheus.MustNewConstMetric(ktlsCurrTxDeviceDesc, prometheus.GaugeValue, float64(stat.TLSCurrTxDevice))
ch <- prometheus.MustNewConstMetric(ktlsCurrRxDeviceDesc, prometheus.GaugeValue, float64(stat.TLSCurrRxDevice))
ch <- prometheus.MustNewConstMetric(ktlsTxDesc, prometheus.CounterValue, float64(stat.TLSTxSw))
ch <- prometheus.MustNewConstMetric(ktlsRxDesc, prometheus.CounterValue, float64(stat.TLSRxSw))
ch <- prometheus.MustNewConstMetric(ktlsTxDeviceDesc, prometheus.CounterValue, float64(stat.TLSTxDevice))
ch <- prometheus.MustNewConstMetric(ktlsRxDeviceDesc, prometheus.CounterValue, float64(stat.TLSRxDevice))
ch <- prometheus.MustNewConstMetric(ktlsDecryptErrorDesc, prometheus.CounterValue, float64(stat.TLSDecryptError))
ch <- prometheus.MustNewConstMetric(ktlsRxDeviceResyncDesc, prometheus.CounterValue, float64(stat.TLSRxDeviceResync))
ch <- prometheus.MustNewConstMetric(ktlsDecryptRetryDesc, prometheus.CounterValue, float64(stat.TLSDecryptRetry))
ch <- prometheus.MustNewConstMetric(ktlsRxNoPadViolationDesc, prometheus.CounterValue, float64(stat.TLSRxNoPadViolation))

return err
}
1 change: 1 addition & 0 deletions end-to-end-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enabled_collectors=$(cat << COLLECTORS
interrupts
ipvs
ksmd
ktls
lnstat
loadavg
mdadm
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ require (
github.com/prometheus/client_model v0.6.0
github.com/prometheus/common v0.48.0
github.com/prometheus/exporter-toolkit v0.11.0
github.com/prometheus/procfs v0.12.0
github.com/prometheus/procfs v0.13.0
github.com/safchain/ethtool v0.3.0
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
golang.org/x/sys v0.18.0
Expand All @@ -53,7 +53,7 @@ require (
golang.org/x/crypto v0.18.0 // indirect
golang.org/x/net v0.20.0 // indirect
golang.org/x/oauth2 v0.16.0 // indirect
golang.org/x/sync v0.5.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.32.0 // indirect
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSz
github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g=
github.com/prometheus/exporter-toolkit v0.11.0/go.mod h1:BVnENhnNecpwoTLiABx7mrPB/OLRIgN74qlQbV+FK1Q=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o=
github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/safchain/ethtool v0.3.0 h1:gimQJpsI6sc1yIqP/y8GYgiXn/NjgvpM0RNoWLVVmP0=
github.com/safchain/ethtool v0.3.0/go.mod h1:SA9BwrgyAqNo7M+uaL6IYbxpm5wk3L7Mm6ocLW+CJUs=
Expand All @@ -106,8 +106,8 @@ golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ=
golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20211031064116-611d5d643895/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
Loading