Skip to content

Commit

Permalink
Gangams/sept agent release tasks (#445)
Browse files Browse the repository at this point in the history
* turnoff mdm nonsupported cluster types

* enable validation of server cert for ai ruby http client

* add kubelet operations total and total error metrics

* node selector label change

* label update

* wip

* wip

* wip

* revert quotes
  • Loading branch information
ganga1980 authored Sep 25, 2020
1 parent 2d8c03f commit da06d76
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 15 deletions.
9 changes: 4 additions & 5 deletions build/linux/installer/conf/telegraf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,7 @@
name_prefix="container.azm.ms/"
## An array of urls to scrape metrics from.
urls = ["$CADVISOR_METRICS_URL"]
## Include "$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC" when we add for support for 1.18
fieldpass = ["$KUBELET_RUNTIME_OPERATIONS_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC"]
fieldpass = ["$KUBELET_RUNTIME_OPERATIONS_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC", "$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC"]

metric_version = 2
url_tag = "scrapeUrl"
Expand Down Expand Up @@ -675,7 +674,7 @@
name_prefix="container.azm.ms/"
## An array of urls to scrape metrics from.
urls = ["$CADVISOR_METRICS_URL"]

fieldpass = ["kubelet_running_pod_count","volume_manager_total_volumes", "kubelet_node_config_error", "process_resident_memory_bytes", "process_cpu_seconds_total"]

metric_version = 2
Expand All @@ -690,7 +689,7 @@
## Optional TLS Config
tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
insecure_skip_verify = true


## prometheus custom metrics
[[inputs.prometheus]]
Expand Down Expand Up @@ -731,7 +730,7 @@
#name_prefix="container.azm.ms/"
## An array of urls to scrape metrics from.
urls = $AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE

metric_version = 2
url_tag = "scrapeUrl"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ spec:
dockerProviderVersion: {{ .Values.omsagent.image.dockerProviderVersion }}
schema-versions: "v1"
spec:
{{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
nodeSelector:
kubernetes.io/os: windows
{{- else }}
nodeSelector:
beta.kubernetes.io/os: windows
{{- end }}
{{- if .Values.omsagent.rbac }}
serviceAccountName: omsagent
{{- end }}
Expand Down
26 changes: 26 additions & 0 deletions charts/azuremonitor-containers/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ omsagent:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- labelSelector:
matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
- key: type
operator: NotIn
values:
- virtual-kubelet
nodeSelectorTerms:
- labelSelector:
matchExpressions:
Expand All @@ -71,6 +82,21 @@ omsagent:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- labelSelector:
matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
- key: type
operator: NotIn
values:
- virtual-kubelet
- key: kubernetes.io/role
operator: NotIn
values:
- master
nodeSelectorTerms:
- labelSelector:
matchExpressions:
Expand Down
9 changes: 4 additions & 5 deletions kubernetes/linux/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,10 @@ fi
echo "configured container runtime on kubelet is : "$CONTAINER_RUNTIME
echo "export CONTAINER_RUNTIME="$CONTAINER_RUNTIME >> ~/.bashrc

# enable these metrics in next agent release
# export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="kubelet_runtime_operations_total"
# echo "export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC >> ~/.bashrc
# export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="kubelet_runtime_operations_errors_total"
# echo "export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC >> ~/.bashrc
export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="kubelet_runtime_operations_total"
echo "export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC >> ~/.bashrc
export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="kubelet_runtime_operations_errors_total"
echo "export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC >> ~/.bashrc

# default to docker metrics
export KUBELET_RUNTIME_OPERATIONS_METRIC="kubelet_docker_operations"
Expand Down
3 changes: 2 additions & 1 deletion kubernetes/omsagent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,8 @@ spec:
nodeSelectorTerms:
- labelSelector:
matchExpressions:
- key: beta.kubernetes.io/os
# kubernetes.io/os label doesnt exist in k8s versions < 1.14 so make sure to choose label based on k8s version in aks yaml
- key: kubernetes.io/os
operator: In
values:
- linux
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ def send(data_to_send)
request.body = compressed_data
if @proxy.nil? || @proxy.empty?
http = Net::HTTP.new uri.hostname, uri.port
else
else
http = Net::HTTP.new(uri.hostname, uri.port, @proxy[:addr], @proxy[:port], @proxy[:user], @proxy[:pass])
end
if uri.scheme.downcase == 'https'
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
end

response = http.request(request)
Expand Down
9 changes: 7 additions & 2 deletions source/plugins/ruby/out_mdm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,17 @@ def configure(conf)
def start
super
begin
file = File.read(@@azure_json_path)
@data_hash = JSON.parse(file)
aks_resource_id = ENV["AKS_RESOURCE_ID"]
aks_region = ENV["AKS_REGION"]

if aks_resource_id.to_s.empty?
@log.info "Environment Variable AKS_RESOURCE_ID is not set.. "
@can_send_data_to_mdm = false
elsif !aks_resource_id.downcase.include?("/microsoft.containerservice/managedclusters/") && !aks_resource_id.downcase.include?("/microsoft.kubernetes/connectedclusters/")
@log.info "MDM Metris not supported for this cluster type resource: #{aks_resource_id}"
@can_send_data_to_mdm = false
end

if aks_region.to_s.empty?
@log.info "Environment Variable AKS_REGION is not set.. "
@can_send_data_to_mdm = false
Expand Down Expand Up @@ -106,6 +108,9 @@ def start
@cluster_identity = ArcK8sClusterIdentity.new
@cached_access_token = @cluster_identity.get_cluster_identity_token
else
# azure json file only used for aks and doesnt exist in non-azure envs
file = File.read(@@azure_json_path)
@data_hash = JSON.parse(file)
# Check to see if SP exists, if it does use SP. Else, use msi
sp_client_id = @data_hash["aadClientId"]
sp_client_secret = @data_hash["aadClientSecret"]
Expand Down

0 comments on commit da06d76

Please sign in to comment.