Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enrich SOS report content #3222

Merged
merged 9 commits into from
Apr 7, 2021
2 changes: 2 additions & 0 deletions charts/prometheus-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ prometheus:
matchLabels:
app.kubernetes.io/name: prometheus-operator-prometheus

enableAdminAPI: '__var__(prometheus.spec.config.enable_admin_api)'

grafana:
adminPassword: admin

Expand Down
8 changes: 4 additions & 4 deletions packages/common/metalk8s-sosreport/containerd.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ def setup(self):

def _get_crio_list(self, cmd):
ret = []
result = self.get_command_output(cmd)
if result['status'] == 0:
for entry in result['output'].splitlines():
if 'deprecated' not in entry[0]:
result = self.exec_cmd(cmd)
if result["status"] == 0:
for entry in result["output"].splitlines():
if "deprecated" not in entry[0]:
# Prevent the socket deprecation warning
# from being iterated over
ret.append(entry)
Expand Down
176 changes: 136 additions & 40 deletions packages/common/metalk8s-sosreport/metalk8s.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#! /bin/env python3

from sos.plugins import Plugin, RedHatPlugin, UbuntuPlugin
from os import path

import requests
from sos.plugins import Plugin, RedHatPlugin, UbuntuPlugin


class metalk8s(Plugin, RedHatPlugin, UbuntuPlugin):

Expand All @@ -14,23 +16,99 @@ class metalk8s(Plugin, RedHatPlugin, UbuntuPlugin):
files = ('/etc/kubernetes/admin.conf',)

option_list = [
('all', 'also collect all namespaces output separately',
'slow', False),
('describe', 'capture descriptions of all kube resources',
'fast', False),
('podlogs', 'capture logs for pods', 'slow', False),
("all", "also collect all namespaces output separately", "slow", False),
("describe", "capture descriptions of all kube resources", "fast", False),
("podlogs", "capture logs for pods", "slow", False),
("prometheus-snapshot", "generate a Prometheus snapshot", "slow", False),
]

def check_is_master(self):
return any([path.exists("/etc/kubernetes/admin.conf")])

def prometheus_snapshot(self):
kube_cmd = (
"kubectl "
"--kubeconfig=/etc/kubernetes/admin.conf "
"--namespace metalk8s-monitoring"
)

# Retrieve Prometheus endpoint
prom_endpoint_cmd = (
"{0} get endpoints "
"prometheus-operator-prometheus --output "
"jsonpath='{{ .subsets[0].addresses[0].targetRef.name }} "
"{{ .subsets[0].addresses[0].ip }}:"
"{{ .subsets[0].ports[0].port }}'".format(kube_cmd)
)
prom_endpoint_res = self.exec_cmd(prom_endpoint_cmd)
prom_instance, prom_endpoint = prom_endpoint_res["output"].split()

# Generate snapshot
# return a JSON object as follows:
# {"status":"success","data":{"name":"20210322T164646Z-7d0b9ca8be8e9981"}}
# or in case of error:
# {"status":"error","errorType":"unavailable","error":"admin APIs disabled"}
prom_snapshot_url = "http://{0}/api/v1/admin/tsdb/snapshot".format(
prom_endpoint
)
res = requests.post(prom_snapshot_url)
try:
res.raise_for_status()
except requests.exceptions.HTTPError as exc:
self._log_error(
"An error occurred while querying Prometheus API: {0}".format(str(exc))
)
return

try:
res_json = res.json()
except ValueError as exc:
self._log_error(
"Invalid JSON returned by Prometheus API: {0}".format(res.text)
)
return

try:
snapshot_name = res_json["data"]["name"]
except KeyError:
self._log_error(
"Unable to generate Prometheus snapshot: {0}".format(res_json["error"])
)
return

# Copy snapshot locally
snapshot_archive_dir = "{0}/prometheus-snapshot".format(
self.archive.get_archive_path()
)

copy_snapshot_cmd = (
"{0} cp -c prometheus {1}:/prometheus/snapshots/{2} {3}".format(
kube_cmd, prom_instance, snapshot_name, snapshot_archive_dir
)
)
self.exec_cmd(copy_snapshot_cmd)

# Remove snapshot from Prometheus pod
delete_snapshot_cmd = (
"{0} exec -c prometheus {1} -- "
"rm -rf /prometheus/snapshots/{2}".format(
kube_cmd, prom_instance, snapshot_name
)
)
self.exec_cmd(delete_snapshot_cmd)

def setup(self):
self.add_copy_spec('/etc/kubernetes/manifests')
self.add_copy_spec('/var/log/pods')
self.add_copy_spec('/var/log/metalk8s')
self.add_copy_spec("/etc/kubernetes/manifests")
self.add_copy_spec("/etc/metalk8s/bootstrap.yaml")
self.add_copy_spec("/etc/metalk8s/solutions.yaml")
self.add_copy_spec("/etc/salt")
self.add_forbidden_path("/etc/salt/pki")
self.add_copy_spec("/var/log/pods")
self.add_copy_spec("/var/log/metalk8s")

services = [
'kubelet',
"kubelet",
"salt-minion",
]

for service in services:
Expand All @@ -42,13 +120,16 @@ def setup(self):
if path.exists('/etc/kubernetes/admin.conf'):
kube_cmd += '--kubeconfig=/etc/kubernetes/admin.conf'

kube_get_cmd = 'get -o json '
for subcmd in ['version', 'config view']:
self.add_cmd_output('{0} {1}'.format(kube_cmd, subcmd))
kube_get_cmd = "get -o json "
for subcmd in ["version", "config view", "top nodes"]:
self.add_cmd_output("{0} {1}".format(kube_cmd, subcmd))

# get all namespaces in use
namespaces_result = self.get_command_output('{0} get namespaces'.format(kube_cmd))
kube_namespaces = [n.split()[0] for n in namespaces_result['output'].splitlines()[1:] if n]
namespaces_result = self.exec_cmd(
"{0} get namespaces --no-headers"
"--output custom-columns=':metadata.name'".format(kube_cmd)
)
kube_namespaces = namespaces_result["output"].splitlines()

resources = [
'pods',
Expand All @@ -72,39 +153,54 @@ def setup(self):
if self.get_option('all'):
kube_namespaced_cmd = '{0} {1} {2}'.format(kube_cmd, kube_get_cmd, kube_namespace)

self.add_cmd_output('{} events'.format(kube_namespaced_cmd))
for subcmd in ["events", "top pods"] + resources:
self.add_cmd_output(
TeddyAndrieux marked this conversation as resolved.
Show resolved Hide resolved
"{0} {1}".format(kube_namespaced_cmd, subcmd)
)

if self.get_option('describe'):
# need to drop json formatting for this
kube_namespaced_cmd = '{0} get {1}'.format(kube_cmd, kube_namespace)
for res in resources:
self.add_cmd_output('{0} {1}'.format(kube_namespaced_cmd, res))

if self.get_option('describe'):
# need to drop json formatting for this
kube_namespaced_cmd = '{0} get {1}'.format(kube_cmd, kube_namespace)
for res in resources:
r = self.get_command_output(
'{0} {1}'.format(kube_namespaced_cmd, res))
if r['status'] == 0:
kube_cmd_result = [k.split()[0] for k in
r['output'].splitlines()[1:]]
for k in kube_cmd_result:
kube_namespaced_cmd = '{0} {1}'.format(kube_cmd, kube_namespace)
self.add_cmd_output(
'{0} describe {1} {2}'.format(kube_namespaced_cmd, res, k))

if self.get_option('podlogs'):
kube_namespaced_cmd = '{0} {1}'.format(kube_cmd, kube_namespace)
r = self.get_command_output('{} get pods'.format(kube_namespaced_cmd))
if r['status'] == 0:
pods = [p.split()[0] for p in
r['output'].splitlines()[1:]]
for pod in pods:
self.add_cmd_output('{0} logs {1} --all-containers'.format(kube_namespaced_cmd, pod))
r = self.exec_cmd(
"{0} {1} --no-headers "
"--output custom-colums=':metadata.name'".format(
kube_namespaced_cmd, res
)
)
if r["status"] == 0:
for k in r["output"].splitlines():
kube_namespaced_cmd = "{0} {1}".format(
kube_cmd, kube_namespace
)
self.add_cmd_output(
"{0} describe {1} {2}".format(
kube_namespaced_cmd, res, k
)
)

if self.get_option("podlogs"):
kube_namespaced_cmd = "{0} {1}".format(kube_cmd, kube_namespace)
r = self.exec_cmd(
"{} get pods --no-headers --output "
"custom-columns=':metadata.name'".format(kube_namespaced_cmd)
)
if r["status"] == 0:
for pod in r["output"].splitlines():
self.add_cmd_output(
"{0} logs {1} --all-containers".format(
kube_namespaced_cmd, pod
)
)

if not self.get_option('all'):
kube_namespaced_cmd = '{} get --all-namespaces=true'.format(kube_cmd)
for res in resources:
self.add_cmd_output('{0} {1}'.format(kube_namespaced_cmd, res))

if self.get_option("prometheus-snapshot"):
self.prometheus_snapshot()

def postproc(self):
# First, clear sensitive data from the json output collected.
# This will mask values when the 'name' looks susceptible of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ spec:
# Configure the Prometheus Deployment
deployment:
replicas: 1
config:
enable_admin_api: false
Original file line number Diff line number Diff line change
Expand Up @@ -51620,7 +51620,7 @@ spec:
pathPrefix: /
port: web
baseImage: {% endraw -%}{{ build_image_name("prometheus", False) }}{%- raw %}
enableAdminAPI: false
enableAdminAPI: {% endraw -%}{{ prometheus.spec.config.enable_admin_api }}{%- raw %}
externalUrl: http://prometheus-operator-prometheus.metalk8s-monitoring:9090
listenLocal: false
logFormat: logfmt
Expand Down