Skip to content

Commit

Permalink
Merge pull request #68 from Akrog/fix-container-logs
Browse files Browse the repository at this point in the history
Fix container logs
  • Loading branch information
openshift-merge-bot[bot] authored Jun 26, 2024
2 parents d16c38c + 1a3412c commit 3d15255
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 34 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ oc adm must-gather --image=quay.io/openstack-k8s-operators/openstack-must-gather

This is the list of available environmental variables:

- `OSP_NS`: Namespace where the OSP services are running. Defaults to
`openstack`.
- `OSP_OPERATORS_NS`: Namespace where the OSP operators are running. Defaults
to `openstack-operators`.
- `CONCURRENCY`: Must gather runs many operations, so to speed things up we run
them in parallel with a concurrency of 5 by default. Users can change this
environmental variable to adjust to its needs.
Expand Down
13 changes: 11 additions & 2 deletions collection-scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,19 @@

source "${DIR_NAME}/bg.sh"

export OSP_NS="${OSP_NS-openstack}"
export OSP_OPERATORS_NS="${OSP_OPERATORS_NS-openstack-operators}"

# This option is used for CI purposes and
# is enabled by default
export SOS_DECOMPRESS=${SOS_DECOMPRESS:-1}

export BASE_COLLECTION_PATH="${BASE_COLLECTION_PATH:-/must-gather}"
export SOS_PATH="${BASE_COLLECTION_PATH}/sos-reports"
export SOS_PATH_NODES="${SOS_PATH}/_all_nodes"
declare -a DEFAULT_NAMESPACES=(
"openstack"
"openstack-operators"
"${OSP_NS}"
"${OSP_OPERATORS_NS}"
"baremetal-operator-system"
"openshift-machine-api"
"cert-manager"
Expand Down
62 changes: 54 additions & 8 deletions collection-scripts/gather_ctlplane_resources
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,45 @@ if [[ -z "$DIR_NAME" ]]; then
fi


function rotated_logs_symlinks {
[[ ${SOS_DECOMPRESS} -ne 1 ]] && return

for filename_dest in "${NAMESPACE_PATH}"/*/pods/*; do
# If file not found we get the glob string itself, so ignore it
[ -e "$filename_dest" ] || continue
if [[ $filename_dest =~ ${NAMESPACE_PATH}/([^/]+)/pods/(.*) ]]; then
namespace="${BASH_REMATCH[1]}"
podname="${BASH_REMATCH[2]}"

# Some pods have no directory (eg: metallb-system_speaker-c8bxh_7c886529-fa9e-4357-afb2-2e712049b317/130c1942bd055c5b0b16ec19a7c03dca74fe7a48604091284b5868ce53eb1840.log)
# Most have a directory with the container name within the pod
for log_name in "${SOS_PATH_NODES}"/*/var/log/pods/"${namespace}_${podname}"_*/{*.log.*,*/*.log.*}; do
# If file not found we get the glob string itself, so ignore it
[ -e "$log_name" ] || continue
# To avoid collisions on rotated log names among sos reports construct a new name:
# [<container_name>.]<filename>.<nodename>.<logfile_extension>
# For visual comfort when it's not a gz file we'll use a different pattern
# [<container_name>.]<filename>.<logfile_extension>.<nodename>
# That way we'll end up with:
# manager.1.log.20240624-185849.sosreport-crc-vlf7c-master-0.gz
# manager.1.log.20240624-191028.sosreport-crc-vlf7c-master-0
if [[ $log_name =~ ${SOS_PATH_NODES}/([^/]+)/var/log/pods/[^/]+/(.*) ]]; then
nodename="${BASH_REMATCH[1]}"
basename=${BASH_REMATCH[2]//\//.}
if [[ "${basename##*.}" == 'gz' ]]; then
linkfile="${filename_dest}/logs/${basename%.*}.${nodename}.${basename##*.}"
else
linkfile="${filename_dest}/logs/${basename}.${nodename}"
fi
echo "Creating symlink ${linkfile} to ${log_name}"
ln -s "${log_name}" "${linkfile}"
fi
done
fi
done
}


function gather_ctlplane_resources {
local NS="$1"
# Only get resources if the namespace exists
Expand All @@ -22,14 +61,21 @@ function gather_ctlplane_resources {
run_bg /usr/bin/oc -n "${NS}" get pvc '>' "${NAMESPACE_PATH}/${NS}/pvc.log"
run_bg /usr/bin/oc -n "${NS}" get network-attachment-definitions -o yaml '>' "${NAMESPACE_PATH}/${NS}/nad.log"

# Don't gather the logs here, they are all gathered from /var/log/pods in gather_sos
pods_dir="${NAMESPACE_PATH}/${NS}/pods/"
mkdir -p "${pods_dir}"
data=$(oc -n "$NS" get pod --no-headers -o custom-columns=":metadata.name")
while read -r pod; do
echo "Describe pod ${pod}";
# describe pod
run_bg oc -n "$NS" describe pod "$pod" '>' "${pods_dir}/${pod}-describe"
# We make a single request to get lines in the form <pod> <container> <crash_status>
data=$(oc -n "$NS" get pod -o go-template='{{range $indexp,$pod := .items}}{{range $index,$element := $pod.status.containerStatuses}}{{printf "%s %s" $pod.metadata.name $element.name}} {{ if ne $element.lastState.terminated nil }}{{ printf "%s" $element.lastState.terminated }}{{ end }}{{ printf "\n"}}{{end}}{{end}}')
while read -r pod container crash_status; do
echo "Dump logs for ${container} from ${pod} pod";
pod_dir="${NAMESPACE_PATH}/${NS}/pods/${pod}"
log_dir="${pod_dir}/logs"
if [ ! -d "$log_dir" ]; then
mkdir -p "$log_dir"
# describe pod
run_bg oc -n "$NS" describe pod "$pod" '>' "${pod_dir}/${pod}-describe"
fi
run_bg oc -n "$NS" logs "$pod" -c "$container" '>' "${log_dir}/${container}.log"
if [[ -n "$crash_status" ]]; then
run_bg oc -n "$NS" logs "$pod" -c "$container" --previous '>' "${log_dir}/${container}-previous.log";
fi
done <<< "$data"

# get the required resources
Expand Down
2 changes: 1 addition & 1 deletion collection-scripts/gather_db
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if [ "${OPENSTACK_DATABASES-unset}" = "unset" ] || [[ -z "${OPENSTACK_DATABASES}
# If no databases options are passed, skip the database dump
echo "Skip Database dump: an empty list is provided"
[[ $CALLED -eq 1 ]] && exit 0
exit 0
return
fi

# Create the db_dump directory in the BASE_COLLECTION_PATH
Expand Down
6 changes: 0 additions & 6 deletions collection-scripts/gather_edpm_sos
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ if [[ -z "$DIR_NAME" ]]; then
source "${DIR_NAME}/common.sh"
fi

# This option is used for CI purposes and
# is enabled by default
SOS_DECOMPRESS=${SOS_DECOMPRESS:-1}

SOS_PATH="${BASE_COLLECTION_PATH}/sos-reports"
SOS_PATH_NODES="${BASE_COLLECTION_PATH}/sos-reports/_all_nodes"
TMPDIR=/var/tmp/sos-osp

if [[ -z "$SOS_EDPM" ]]; then
Expand Down
5 changes: 4 additions & 1 deletion collection-scripts/gather_run
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ source "${DIR_NAME}/gather_services_status"

# dump the openstack database
source "${DIR_NAME}/gather_db"
#

# Wait for background tasks to complete
wait_bg

# Create rotated log symlinks after everything else has finished
rotated_logs_symlinks
9 changes: 5 additions & 4 deletions collection-scripts/gather_services_status
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ if [[ -z "$DIR_NAME" ]]; then
source "${DIR_NAME}/common.sh"
fi

alias os="/usr/bin/oc -n openstack rsh openstackclient openstack "
# shellcheck disable=SC2139 # We want it expanded when defined
alias os="/usr/bin/oc -n ${OSP_NS} rsh openstackclient openstack "

# For each service passed an input, if the associated entry exists,
# we can call the related function that processes specific service
Expand Down Expand Up @@ -110,8 +111,8 @@ get_nova_status() {
mkdir -p "$NOVA_PATH"
run_bg ${BASH_ALIASES[os]} compute service list '>' "$NOVA_PATH"/service_list
run_bg ${BASH_ALIASES[os]} hypervisor list '>' "$NOVA_PATH"/hypervisor_list
run_bg /usr/bin/oc -n openstack exec -t nova-cell0-conductor-0 -- nova-manage cell_v2 list_cells '>' "$NOVA_PATH"/cell_list
run_bg /usr/bin/oc -n openstack exec -t nova-cell0-conductor-0 -- nova-manage cell_v2 list_hosts '>' "$NOVA_PATH"/host_list
run_bg /usr/bin/oc -n ${OSP_NS} exec -t nova-cell0-conductor-0 -- nova-manage cell_v2 list_cells '>' "$NOVA_PATH"/cell_list
run_bg /usr/bin/oc -n ${OSP_NS} exec -t nova-cell0-conductor-0 -- nova-manage cell_v2 list_hosts '>' "$NOVA_PATH"/host_list
run_bg ${BASH_ALIASES[os]} aggregate list --long '>' "$NOVA_PATH"/aggregate_list
}

Expand Down Expand Up @@ -156,7 +157,7 @@ get_aodh_status() {
# Ceilometer, sg-core, prometheus service gathering - metrics
get_ceilometer_status() {
local CEILOMETER_PATH="$BASE_COLLECTION_PATH/ctlplane/ceilometer"
if /usr/bin/oc -n openstack get metricstorage metric-storage &> /dev/null; then
if /usr/bin/oc -n ${OSP_NS} get metricstorage metric-storage &> /dev/null; then
# For `openstack metric list` command to work we need ceilometer
# in the openstack as well as metricstorage deployed
# on openshift.
Expand Down
18 changes: 7 additions & 11 deletions collection-scripts/gather_sos
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,6 @@ if [[ -n "$SOS_ONLY_PLUGINS" ]]; then
SOS_LIMIT="--only-plugins $SOS_ONLY_PLUGINS"
fi

# This option is used for CI purposes and
# is enabled by default
SOS_DECOMPRESS=${SOS_DECOMPRESS:-1}

SOS_PATH="${BASE_COLLECTION_PATH}/sos-reports"
SOS_PATH_NODES="${BASE_COLLECTION_PATH}/sos-reports/_all_nodes"

TMPDIR=/var/tmp/sos-osp

###############################################################################
Expand All @@ -69,19 +62,22 @@ gather_node_sos () {
# [1]: https://github.com/coreos/toolbox/blob/9a7c840fb4881f406287bf29e5f35b6625c7b358/rhcos-toolbox#L37
# [2]: https://github.com/coreos/toolbox/issues/60
# - Use 2 tar files instead of 1 since tar's "-n --concatenate" and "--append" don't support compressed files
# - Use tar's transform when adding /var/log/pods so "var" is not removed when untaring with --strip-components
# - Use tar's transform when adding rotated logs from /var/log/pods so "var" is not removed when untaring with
# --strip-components
# To avoid performance penalty we don't look for the real directory name using:
# $(tar --exclude='*/*' -tf "${FILENAME}" | head -n1)
# Instead we use a fake podlogs top directory
# - Ignore warning exit code (1) from tar, and only consider it a failure on error code (2)
# - Ignore errors on tar since it fails if logs are added while doing the tar as well as if a file doesn't exist
# (because the glob has no data or because the file was removed by the log rotation mechanism)
# - Build LOGS env var to pass tar based on existing files, as tar fails when run with glob that produces no files
oc debug "node/$node" -- chroot /host bash \
-c "echo 'TOOLBOX_NAME=toolbox-osp' > /root/.toolboxrc ; \
rm -rf \"${TMPDIR}\" && \
mkdir -p \"${TMPDIR}\" && \
sudo podman rm --force toolbox-osp; \
sudo --preserve-env podman pull --authfile /var/lib/kubelet/config.json registry.redhat.io/rhel9/support-tools && \
toolbox sos report --batch --all-logs $SOS_LIMIT --tmp-dir=\"${TMPDIR}\" && \
tar --warning=no-file-changed -cJf \"${TMPDIR}/podlogs.tar.xz\" --transform 's,^,podlogs/,' /var/log/pods; [ \$? -lt 2 ]"
if [[ \"\$(ls /var/log/pods/*/{*.log.*,*/*.log.*} 2>/dev/null)\" != '' ]]; then tar --ignore-failed-read --warning=no-file-changed -cJf \"${TMPDIR}/podlogs.tar.xz\" --transform 's,^,podlogs/,' /var/log/pods/*/{*.log.*,*/*.log.*} || true; fi"

# shellcheck disable=SC2181
if [ $? -ne 0 ]; then
Expand Down Expand Up @@ -155,7 +151,7 @@ mkdir -p "${SOS_PATH_NODES}"

# Get list of nodes and service label for each of the OpenStack service pods
# Not using -o jsonpath='{.spec.nodeName}' because it uses space separator
svc_nodes=$(/usr/bin/oc -n openstack get pod -l service --no-headers -o=custom-columns=NODE:.spec.nodeName,SVC:.metadata.labels.service,NAME:.metadata.name)
svc_nodes=$(/usr/bin/oc -n ${OSP_NS} get pod -l service --no-headers -o=custom-columns=NODE:.spec.nodeName,SVC:.metadata.labels.service,NAME:.metadata.name)
nodes=''
while read -r node svc name; do
svc_path=$(dest_svc_path "$svc")
Expand Down
2 changes: 1 addition & 1 deletion collection-scripts/gather_trigger_gmr
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ if [[ -z "$DIR_NAME" ]]; then
source "${DIR_NAME}/common.sh"
fi

oc="/usr/bin/oc -n openstack "
oc="/usr/bin/oc -n ${OSP_NS} "
oce="$oc exec "


Expand Down

0 comments on commit 3d15255

Please sign in to comment.