cluster-manifests/kube-system/container-azm-ms-agentconfig.yaml

kind: ConfigMap
apiVersion: v1
metadata:
  name: container-azm-ms-agentconfig
  namespace: kube-system
data:
  # https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/kubernetes/container-azm-ms-agentconfig.yaml
  # https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-agent-config
  schema-version:
    #string.used by agent to parse config. supported versions are {v1}. Configs with other schema versions will be rejected by the agent.
    v1
  config-version:
    #string.used by customer to keep track of this config file's version in their source control/repository (max allowed 10 chars, other chars will be truncated)
    ver1
  # log-data-collection-settings: |-
    # Log data collection settings
    # Any errors related to config map settings can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to.

    # [log_collection_settings]
       # [log_collection_settings.stdout]
          # In the absense of this configmap, default value for enabled is true
          # enabled = true
          # exclude_namespaces setting holds good only if enabled is set to true
          # kube-system,gatekeeper-system log collection are disabled by default in the absence of 'log_collection_settings.stdout' setting. If you want to enable kube-system,gatekeeper-system, remove them from the following setting.
          # If you want to continue to disable kube-system,gatekeeper-system log collection keep the namespaces in the following setting and add any other namespace you want to disable log collection to the array.
          # In the absense of this configmap, default value for exclude_namespaces = ["kube-system","gatekeeper-system"]
          # exclude_namespaces = ["kube-system","gatekeeper-system"]

       # [log_collection_settings.stderr]
          # Default value for enabled is true
          # enabled = true
          # exclude_namespaces setting holds good only if enabled is set to true
          # kube-system,gatekeeper-system log collection are disabled by default in the absence of 'log_collection_settings.stderr' setting. If you want to enable kube-system,gatekeeper-system, remove them from the following setting.
          # If you want to continue to disable kube-system,gatekeeper-system log collection keep the namespaces in the following setting and add any other namespace you want to disable log collection to the array.
          # In the absense of this configmap, default value for exclude_namespaces = ["kube-system","gatekeeper-system"]
          # exclude_namespaces = ["kube-system","gatekeeper-system"]

       # [log_collection_settings.env_var]
          # In the absense of this configmap, default value for enabled is true
          # enabled = true
       # [log_collection_settings.enrich_container_logs]
          # In the absense of this configmap, default value for enrich_container_logs is false
          # enabled = false
          # When this is enabled (enabled = true), every container log entry (both stdout & stderr) will be enriched with container Name & container Image
       # [log_collection_settings.collect_all_kube_events]
          # In the absense of this configmap, default value for collect_all_kube_events is false
          # When the setting is set to false, only the kube events with !normal event type will be collected
          # enabled = false
          # When this is enabled (enabled = true), all kube events including normal events will be collected
       # [log_collection_settings.schema]
          # In the absence of this configmap, default value for containerlog_schema_version is "v1"
          # Supported values for this setting are "v1","v2"
          # See documentation at https://aka.ms/ContainerLogv2 for benefits of v2 schema over v1 schema before opting for "v2" schema
          # containerlog_schema_version = "v2"
       #[log_collection_settings.enable_multiline_logs]
          # fluent-bit based multiline log collection for go (stacktrace), dotnet (stacktrace)
          # if enabled will also stitch together container logs split by docker/cri due to size limits(16KB per log line)
          # enabled = "false"

  prometheus-data-collection-settings: |-
    # Custom Prometheus metrics data collection settings
    [prometheus_data_collection_settings.cluster]
        # Cluster level scrape endpoint(s). These metrics will be scraped from agent's Replicaset (singleton)
        # Any errors related to prometheus scraping can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to.

        #Interval specifying how often to scrape for metrics. This is duration of time and can be specified for supporting settings by combining an integer value and time unit as a string value. Valid time units are ns, us (or µs), ms, s, m, h.
        interval = "1m"

        ## Uncomment the following settings with valid string arrays for prometheus scraping
        #fieldpass = ["metric_to_pass1", "metric_to_pass12"]

        #fielddrop = ["metric_to_drop"]

        # An array of urls to scrape metrics from.
        # urls = ["http://myurl:9101/metrics"]

        # An array of Kubernetes services to scrape metrics from.
        # kubernetes_services = ["http://my-service-dns.my-namespace:9102/metrics"]

        # When monitor_kubernetes_pods = true, replicaset will scrape Kubernetes pods for the following prometheus annotations:
        # - prometheus.io/scrape: Enable scraping for this pod
        # - prometheus.io/scheme: Default is http
        #     set this to `https` & most likely set the tls config.
        # - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
        # - prometheus.io/port: If port is not 9102 use this annotation
        monitor_kubernetes_pods = true

        ## Restricts Kubernetes monitoring to namespaces for pods that have annotations set and are scraped using the monitor_kubernetes_pods setting.
        ## This will take effect when monitor_kubernetes_pods is set to true
        ##   ex: monitor_kubernetes_pods_namespaces = ["default1", "default2", "default3"]
        monitor_kubernetes_pods_namespaces = ["a0008","cluster-baseline-settings"]

        ## Label selector to target pods which have the specified label
        ## This will take effect when monitor_kubernetes_pods is set to true
        ## Reference the docs at https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
        # kubernetes_label_selector = "env=dev,app=nginx"

        ## Field selector to target pods which have the specified field
        ## This will take effect when monitor_kubernetes_pods is set to true
        ## Reference the docs at https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/
        ## eg. To scrape pods on a specific node
        # kubernetes_field_selector = "spec.nodeName=$HOSTNAME"

    [prometheus_data_collection_settings.node]
        # Node level scrape endpoint(s). These metrics will be scraped from agent's DaemonSet running in every node in the cluster
        # Any errors related to prometheus scraping can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to.

        #Interval specifying how often to scrape for metrics. This is duration of time and can be specified for supporting settings by combining an integer value and time unit as a string value. Valid time units are ns, us (or µs), ms, s, m, h.
        interval = "1m"

        ## Uncomment the following settings with valid string arrays for prometheus scraping

        # An array of urls to scrape metrics from. $NODE_IP (all upper case) will substitute of running Node's IP address
        urls = ["http://$NODE_IP:9103/metrics"]
        #fieldpass = ["metric_to_pass1", "metric_to_pass12"]

        #fielddrop = ["metric_to_drop"]

  # metric_collection_settings: |-
    # Metrics collection settings for metrics sent to Log Analytics and MDM
    # [metric_collection_settings.collect_kube_system_pv_metrics]
      # In the absense of this configmap, default value for collect_kube_system_pv_metrics is false
      # When the setting is set to false, only the persistent volume metrics outside the kube-system namespace will be collected
      # When this is enabled (enabled = true), persistent volume metrics including those in the kube-system namespace will be collected
      # enabled = true

  integrations: |-
    [integrations.azure_network_policy_manager]
        collect_basic_metrics = false
        collect_advanced_metrics = false
    [integrations.azure_subnet_ip_usage]
        enabled = true

# Doc - https://github.com/microsoft/Docker-Provider/blob/ci_prod/Documentation/AgentSettings/ReadMe.md
  agent-settings: |-
    # prometheus scrape fluent bit settings for high scale
    # buffer size should be greater than or equal to chunk size else we set it to chunk size.
    # settings scoped to prometheus sidecar container. all values in mb
    [agent_settings.prometheus_fbit_settings]
      tcp_listener_chunk_size = 10
      tcp_listener_buffer_size = 10
      tcp_listener_mem_buf_limit = 200

    # prometheus scrape fluent bit settings for high scale
    # buffer size should be greater than or equal to chunk size else we set it to chunk size.
    # settings scoped to daemonset container. all values in mb
    # [agent_settings.node_prometheus_fbit_settings]
      # tcp_listener_chunk_size = 1
      # tcp_listener_buffer_size = 1
      # tcp_listener_mem_buf_limit = 10

    # prometheus scrape fluent bit settings for high scale
    # buffer size should be greater than or equal to chunk size else we set it to chunk size.
    # settings scoped to replicaset container. all values in mb
    # [agent_settings.cluster_prometheus_fbit_settings]
      # tcp_listener_chunk_size = 1
      # tcp_listener_buffer_size = 1
      # tcp_listener_mem_buf_limit = 10

    # The following settings are "undocumented", we don't recommend uncommenting them unless directed by Microsoft.
    # They increase the maximum stdout/stderr log collection rate but will also cause higher cpu/memory usage.
    ## Ref for more details about Ignore_Older -  https://docs.fluentbit.io/manual/v/1.7/pipeline/inputs/tail
    # [agent_settings.fbit_config]
    #   log_flush_interval_secs = "1"                 # default value is 15
    #   tail_mem_buf_limit_megabytes = "10"           # default value is 10
    #   tail_buf_chunksize_megabytes = "1"            # default value is 32kb (comment out this line for default)
    #   tail_buf_maxsize_megabytes = "1"              # default value is 32kb (comment out this line for default)
    #   tail_ignore_older = "5m"                      # default value same as fluent-bit default i.e.0m

    # On both AKS & Arc K8s enviornments, if Cluster has configured with Forward Proxy then Proxy settings automatically applied and used for the agent
    # Certain configurations, proxy config should be ignored for example Cluster with AMPLS + Proxy
    # in such scenarios, use the following config to ignore proxy settings
    # [agent_settings.proxy_config]
    #    ignore_proxy_settings = "true"  # if this is not applied, default value is false

    # The following settings are "undocumented", we don't recommend uncommenting them unless directed by Microsoft.
    # Configuration settings for the waittime for the network listeners to be available
    # [agent_settings.network_listener_waittime]
    #   tcp_port_25226 = 45                           # Port 25226 is used for telegraf to fluent-bit data in ReplicaSet
    #   tcp_port_25228 = 60                           # Port 25228 is used for telegraf to fluentd data
    #   tcp_port_25229 = 45                           # Port 25229 is used for telegraf to fluent-bit data in DaemonSet

    # The following settings are "undocumented", we don't recommend uncommenting them unless directed by Microsoft.
    # [agent_settings.mdsd_config]
    #   monitoring_max_event_rate = "50000" # default 20K eps
    #   backpressure_memory_threshold_in_mb = "1500" # default 3500MB
    #   upload_max_size_in_mb = "20"  # default 2MB
    #   upload_frequency_seconds = "1" # default 60 upload_frequency_seconds
    #   compression_level = "0"  # supported levels 0 to 9 and 0 means no compression