Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Envoy Distributed Tracing not works as expected #15515

Closed
suikast42 opened this issue Nov 21, 2022 · 1 comment
Closed

Envoy Distributed Tracing not works as expected #15515

suikast42 opened this issue Nov 21, 2022 · 1 comment
Labels
theme/telemetry Anything related to telemetry or observability

Comments

@suikast42
Copy link

Nomad version

1.4.2

Consul Version

1.13.3

I follow up the this Distributed Tracing setup

The consul server config is

datacenter = "dc1"
data_dir =  "/opt/services/core/consul/data"
log_level = "INFO"
node_name = "master-01"
server = true
bind_addr = "0.0.0.0"
advertise_addr = "10.21.21.41"
client_addr = "0.0.0.0"
encrypt = "key"
# https://developer.hashicorp.com/consul/docs/connect/observability/ui-visualization
ui_config{
   enabled = true
   dashboard_url_templates {
       service = "https://grafana.cloud.private/d/lDlaj-NGz/service-overview?orgId=1&var-service={{Service.Name}}&var-namespace={{Service.Namespace}}&var-partition={{Service.Partition}}&var-dc={{Datacenter}}"
   }
   metrics_provider = "prometheus"
   metrics_proxy {
     base_url = "https://mimir.cloud.private/prometheus"

     add_headers = [
 #      {
 #         name = "Authorization"
 #         value = "Bearer <token>"
 #      }
       {
          name = "X-Scope-OrgID"
          value = "1"
       }
     ]
     path_allowlist = ["/prometheus/api/v1/query_range", "/prometheus/api/v1/query"]
   }
}
addresses {
    grpc = "127.0.0.1"
}
ports {
    http = -1
    https = 8501
    grpc = 8502
}
connect {
     enabled = true
}
retry_join =  ["10.21.21.41"]

bootstrap_expect = 1

auto_encrypt{
    allow_tls = true
}
performance{
    raft_multiplier = 1
}

node_meta{
  node_type = "server"
}
tls{
    defaults {
        ca_file = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
        cert_file = "/etc/opt/certs/consul/consul.pem"
        key_file = "/etc/opt/certs/consul/consul-key.pem"
        verify_incoming = true
        verify_outgoing = true
    }
    internal_rpc {
        verify_server_hostname = true
    }
}
#watches = [
#  {
#    type = "checks"
#    handler = "/usr/bin/health-check-handler.sh"
#  }
#]

telemetry {
  disable_hostname = true
  prometheus_retention_time = "72h"
}

# Global configuration
enable_central_service_config = true

# Consul service mesh configuration
config_entries {
  bootstrap = [
    {
      kind = "proxy-defaults"
      name = "global"
      #https://developer.hashicorp.com/consul/docs/connect/distributed-tracing
      #https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/trace/v3/zipkin.proto
      config {
        # configures all sidecar proxies to expose Prometheus metrics.
        envoy_prometheus_bind_addr = "0.0.0.0:9102"
          envoy_tracing_json = <<EOF
        {
          "http":{
            "name":"envoy.tracers.zipkin",
            "typedConfig":{
              "@type":"type.googleapis.com/envoy.config.trace.v3.ZipkinConfig",
              "collector_cluster":"collector_cluster_name",
              "collector_endpoint_version":"HTTP_JSON",
              "collector_endpoint":"/api/v2/spans",
              "shared_span_context":true,
               "trace_id_128bit": true
            }
          }
        }
        EOF

          envoy_extra_static_clusters_json = <<EOF
        {
          "connect_timeout":"3.000s",
          "dns_lookup_family":"V4_ONLY",
          "lb_policy":"ROUND_ROBIN",
          "load_assignment":{
            "cluster_name":"collector_cluster_name",
            "endpoints":[
              {
                "lb_endpoints":[
                  {
                    "endpoint":{
                      "address":{
                        "socket_address":{
                           "address": "10.21.21.42",
                           "port_value": 9411,
                           "protocol":"TCP"
                        }
                      }
                    }
                  }
                ]
              }
            ]
          },
          "name":"collector_cluster_name",
          "type":"STRICT_DNS"
        }
        EOF
      }
    }
  ]
}

I deploy counter dash app

job "countdash_app_mesh" {
  datacenters = ["nomadder1"]
  group "api" {
    network {
      mode = "bridge"
    }

    service {
      name = "count-api"
      port = "9001"
      connect {
        sidecar_service {}
      }
    }

    task "count-api" {
      driver = "docker"

      config {
        image = "hashicorpnomad/counter-api:v3"
        ports = ["http"]
      }

      resources {
        cpu    = 100
        memory = 128
      }
    }
  }

  group "dashboard" {
    network {
      mode = "bridge"

      port "http" {
        to = 9002
      }
    }

    service {
      name = "count-dashboard"
      port = "9002"
            tags = [
              "traefik.enable=true",
              "traefik.consulcatalog.connect=true",
              "traefik.http.routers.count-dashboard.tls=true",
              "traefik.http.routers.count-dashboard.rule=Host(`count.cloud.private`)"
            ]

      connect {
        sidecar_service {
          proxy {
            upstreams {
              destination_name = "count-api"
              local_bind_port  = 8080
            }
          }
        }
      }
    }

    task "dashboard" {
      driver = "docker"

      env {
        CONSUL_TLS_SERVER_NAME = "localhost"
        COUNTING_SERVICE_URL   = "http://${NOMAD_UPSTREAM_ADDR_count_api}"
      }

      config {
        image = "hashicorpnomad/counter-dashboard:v3"
      }

      resources {
        cpu    = 100
        memory = 128
      }
    }
  }
}

And configure trafik for initial tracing

[tracing]
  [tracing.zipkin]
    #httpEndpoint = "http://tempo-zipkin.service.consul/api/v2/spans"
    httpEndpoint = "http://10.21.21.42:9411/api/v2/spans"
    sameSpan = true

[metrics]
  [metrics.prometheus]
    buckets = [0.1,0.3,1.2,5.0,7.5,9.5,9.9]
    addEntryPointsLabels = true
    addRoutersLabels = true
    addServicesLabels = true
[accessLog]
  filePath = "/logs/access.log"
  format = "json"

[experimental.plugins.traefik-add-trace-id]
  moduleName = "github.com/trinnylondon/traefik-add-trace-id"
  version = "v0.1.5"

[http]
  [http.middlewares]
    [http.middlewares.my-traefik-add-trace-id]
      [http.middlewares.my-traefik-add-trace-id.plugin]
        [http.middlewares.my-traefik-add-trace-id.plugin.traefik-add-trace-id]
          headerName = "X-Trace-Id"
          headerPrefix = ""
          verbose = "true"

I expect to see the full trace ( until the envoy proxy of the dashboard ) but see only the trafik trace

image

@jkirschner-hashicorp jkirschner-hashicorp added the theme/telemetry Anything related to telemetry or observability label Nov 21, 2022
@suikast42
Copy link
Author

I am closing this.
The cause is described here
hashicorp/nomad#8647

It's a nomad issue and not consul

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
theme/telemetry Anything related to telemetry or observability
Projects
None yet
Development

No branches or pull requests

2 participants