Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable component based on logs profile #1141

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .trivyignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ CVE-2023-39325 #same as CVE-2023-44487
CVE-2023-3978
CVE-2023-44487 #false positive according to Mariner team
GHSA-jq35-85cj-fj4p
CVE-2019-3826

#telegraf HIGH
GHSA-m425-mq94-257g
Expand All @@ -11,3 +12,9 @@ CVE-2023-47090

# ruby HIGH
CVE-2017-10784

# mariner MEDIUM
CVE-2023-5678

# go
CVE-2023-48795
1 change: 1 addition & 0 deletions build/linux/installer/datafiles/base_container.data
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ MAINTAINER: 'Microsoft Corporation'
/opt/ConfigParseErrorLogger.rb; build/common/installer/scripts/ConfigParseErrorLogger.rb; 755; root; root
/opt/tomlparser-npm-config.rb; build/linux/installer/scripts/tomlparser-npm-config.rb; 755; root; root
/opt/tomlparser-osm-config.rb; build/linux/installer/scripts/tomlparser-osm-config.rb; 755; root; root
/opt/dcr-config-parser.rb; build/linux/installer/scripts/dcr-config-parser.rb; 755; root; root
/opt/test.json; build/linux/installer/conf/test.json; 644; root; root

/etc/fluent/plugin/lib/application_insights/version.rb; source/plugins/ruby/lib/application_insights/version.rb; 644; root; root
Expand Down
52 changes: 52 additions & 0 deletions build/linux/installer/scripts/dcr-config-parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/local/bin/ruby

require 'fileutils'
require 'json'
require_relative 'ConfigParseErrorLogger'

@os_type = ENV['OS_TYPE']
@controllerType = ENV['CONTROLLER_TYPE']
@containerType = ENV['CONTAINER_TYPE']
@dcrConfigFilePattern = '/etc/mdsd.d/config-cache/configchunks/*.json'
@logs_and_events_streams = %w[
CONTAINER_LOG_BLOB
CONTAINERINSIGHTS_CONTAINERLOGV2
KUBE_EVENTS_BLOB
KUBE_POD_INVENTORY_BLOB
]
@logs_and_events_only = false

return if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp('windows').zero?
return unless ENV['USING_AAD_MSI_AUTH'].strip.casecmp('true').zero?

if [email protected]? && [email protected]? && @controllerType.strip.casecmp('daemonset').zero? \
&& @containerType.nil?
begin
file_path = Dir.glob(@dcrConfigFilePattern).first
# Raise an error if no JSON file is found
raise 'No JSON file found in the specified directory' unless file_path

file_contents = File.read(file_path)
data = JSON.parse(file_contents)

raise 'Invalid JSON structure: Missing required keys' unless data.is_a?(Hash) && data.key?('dataSources')

# Extract the stream values
streams = data['dataSources'].select { |ds| ds['id'] == 'ContainerInsightsExtension' }
.flat_map { |ds| ds['streams'] if ds.key?('streams') }
.compact
.map { |stream| stream['stream'] if stream.key?('stream') }
.compact
streams -= @logs_and_events_streams
if streams.empty?
# Write the settings to file, so that they can be set as environment variables
puts 'DCR config matches Log and Events only profile. Setting LOGS_AND_EVENTS_ONLY to true'
@logs_and_events_only = true
file = File.open('dcr_env_var', 'w')
file.write("LOGS_AND_EVENTS_ONLY=#{@logs_and_events_only}\n")
file.close
end
rescue StandardError => e
ConfigParseErrorLogger.logError("Exception while parsing dcr : #{e}. DCR Json data: #{data}")
end
end
29 changes: 21 additions & 8 deletions build/linux/installer/scripts/livenessprobe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ if [[ "${CONTROLLER_TYPE}" == "DaemonSet" && "${CONTAINER_TYPE}" != "PrometheusS
[ -f "/var/run/mdsd-ci/70-rsyslog-forward-mdsd-ci.conf" ] && rm /var/run/mdsd-ci/70-rsyslog-forward-mdsd-ci.conf && echo "remove" > /var/run/mdsd-ci/update.status
fi
fi

CURRENT_LOGS_AND_EVENTS_ONLY=${LOGS_AND_EVENTS_ONLY}
ruby /opt/dcr-config-parser.rb
source dcr_env_var
if [ "${LOGS_AND_EVENTS_ONLY}" != "${CURRENT_LOGS_AND_EVENTS_ONLY}" ]; then
echo "dcr_env_var has been updated - dcr config changed" > /dev/termination-log
exit 1
fi
fi

if [ -s "inotifyoutput.txt" ]
Expand Down Expand Up @@ -47,6 +55,19 @@ then
exit 1
fi

#test to exit non zero value if fluentbit is not running
(ps -ef | grep fluent-bit | grep -v "grep")
if [ $? -ne 0 ]
then
echo "Fluentbit is not running" > /dev/termination-log
exit 1
fi

# LOGS_AND_EVENTS_ONLY mode in daemonset needs only mdsd and fluent-bit
if [[ "${CONTROLLER_TYPE}" == "DaemonSet" && "${CONTAINER_TYPE}" != "PrometheusSidecar" && "${LOGS_AND_EVENTS_ONLY}" == "true" ]]; then
echo "Logs and events only mode enabled" > /dev/write-to-traces
exit 0
fi

#optionally test to exit non zero value if fluentd is not running
#fluentd not used in sidecar container
Expand Down Expand Up @@ -75,14 +96,6 @@ if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ] && [ "${GENEVA_LOGS_INTEGRATI
fi
fi

#test to exit non zero value if fluentbit is not running
(ps -ef | grep fluent-bit | grep -v "grep")
if [ $? -ne 0 ]
then
echo "Fluentbit is not running" > /dev/termination-log
exit 1
fi

#test to exit non zero value if telegraf is not running
if [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" == "true" ]; then
exit 0
Expand Down
52 changes: 36 additions & 16 deletions kubernetes/linux/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -931,11 +931,35 @@ if [ ! -f /etc/cron.d/ci-agent ]; then
echo "*/5 * * * * root /usr/sbin/logrotate -s /var/lib/logrotate/ci-agent-status /etc/logrotate.d/ci-agent >/dev/null 2>&1" >/etc/cron.d/ci-agent
fi

# Write messages from the liveness probe to stdout (so telemetry picks it up)
touch /dev/write-to-traces

if [ "${GENEVA_LOGS_INTEGRATION}" == "true" ] || [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" == "true" ]; then
checkAgentOnboardingStatus $AAD_MSI_AUTH_MODE 30
Copy link
Contributor

@wanlonghenry wanlonghenry Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 938 and 940 is the same, the if condition can be combined

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this part of code is old i just moved it up.

elif [ "${MUTE_PROM_SIDECAR}" != "true" ]; then
checkAgentOnboardingStatus $AAD_MSI_AUTH_MODE 30
else
echo "not checking onboarding status (no metrics to scrape since MUTE_PROM_SIDECAR is true)"
fi

ruby dcr-config-parser.rb
if [ -e "dcr_env_var" ]; then
cat dcr_env_var | while read line; do
echo $line >>~/.bashrc
done
source dcr_env_var
setGlobalEnvVar LOGS_AND_EVENTS_ONLY "${LOGS_AND_EVENTS_ONLY}"
fi

# no dependency on fluentd for prometheus side car container
if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ] && [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" != "true" ]; then
if [ ! -e "/etc/config/kube.conf" ]; then
echo "*** starting fluentd v1 in daemonset"
fluentd -c /etc/fluent/container.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log --log-rotate-age 5 --log-rotate-size 20971520 &
if [ "$LOGS_AND_EVENTS_ONLY" != "true" ]; then
echo "*** starting fluentd v1 in daemonset"
fluentd -c /etc/fluent/container.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log --log-rotate-age 5 --log-rotate-size 20971520 &
Copy link
Contributor

@wanlonghenry wanlonghenry Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

although this is in old code, but where is this 20971520 number coming from? Is it adjustable from configmap?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i also dont know how this was picked. this part of code is quite old and its not configurable

else
echo "Skipping fluentd since LOGS_AND_EVENTS_ONLY is set to true"
fi
else
echo "*** starting fluentd v1 in replicaset"
fluentd -c /etc/fluent/kube.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log --log-rotate-age 5 --log-rotate-size 20971520 &
Expand Down Expand Up @@ -1089,10 +1113,14 @@ if [ ! -e "/etc/config/kube.conf" ] && [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE
echo "no metrics to scrape since MUTE_PROM_SIDECAR is true, not checking for listener on tcp #25229"
fi
else
echo "checking for listener on tcp #25226 and waiting for $WAITTIME_PORT_25226 secs if not.."
waitforlisteneronTCPport 25226 $WAITTIME_PORT_25226
echo "checking for listener on tcp #25228 and waiting for $WAITTIME_PORT_25228 secs if not.."
waitforlisteneronTCPport 25228 $WAITTIME_PORT_25228
if [ "${LOGS_AND_EVENTS_ONLY}" == "true" ]; then
echo "LOGS_AND_EVENTS_ONLY is true, not checking for listener on tcp #25226 and tcp #25228"
else
echo "checking for listener on tcp #25226 and waiting for $WAITTIME_PORT_25226 secs if not.."
waitforlisteneronTCPport 25226 $WAITTIME_PORT_25226
echo "checking for listener on tcp #25228 and waiting for $WAITTIME_PORT_25228 secs if not.."
waitforlisteneronTCPport 25228 $WAITTIME_PORT_25228
fi
fi
elif [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" != "true" ]; then
echo "checking for listener on tcp #25226 and waiting for $WAITTIME_PORT_25226 secs if not.."
Expand All @@ -1106,23 +1134,15 @@ if [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" == "true" ]; then
elif [ "${MUTE_PROM_SIDECAR}" != "true" ]; then
if [ "${CONTROLLER_TYPE}" == "ReplicaSet" ] && [ "${TELEMETRY_RS_TELEGRAF_DISABLED}" == "true" ]; then
echo "not starting telegraf since prom scraping is disabled for replicaset"
elif [ "${CONTROLLER_TYPE}" != "ReplicaSet" ] && [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ] && [ "${LOGS_AND_EVENTS_ONLY}" == "true" ]; then
echo "not starting telegraf for LOGS_AND_EVENTS_ONLY daemonset"
else
/opt/telegraf --config $telegrafConfFile &
fi
else
echo "not starting telegraf (no metrics to scrape since MUTE_PROM_SIDECAR is true)"
fi

# Write messages from the liveness probe to stdout (so telemetry picks it up)
touch /dev/write-to-traces

if [ "${GENEVA_LOGS_INTEGRATION}" == "true" ] || [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" == "true" ]; then
checkAgentOnboardingStatus $AAD_MSI_AUTH_MODE 30
elif [ "${MUTE_PROM_SIDECAR}" != "true" ]; then
checkAgentOnboardingStatus $AAD_MSI_AUTH_MODE 30
else
echo "not checking onboarding status (no metrics to scrape since MUTE_PROM_SIDECAR is true)"
fi

# Get the end time of the setup in seconds
endTime=$(date +%s)
Expand Down