From d6e463f8cdb622363e092b5972cb7d01e83b6c64 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Oct 2023 09:53:29 +0200 Subject: [PATCH 01/57] fix: wrong log level --- lifemonitor/tasks/scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifemonitor/tasks/scheduler.py b/lifemonitor/tasks/scheduler.py index d59d35971..e986b538a 100644 --- a/lifemonitor/tasks/scheduler.py +++ b/lifemonitor/tasks/scheduler.py @@ -29,7 +29,7 @@ def _on_event(self, event: events.JobEvent): logger.debug("List of current jobs: %r", self.get_jobs()) logger.debug("List of deferred jobs: %r", self._not_scheduled_jobs) if event.code in [events.EVENT_JOB_EXECUTED, events.EVENT_JOB_ERROR]: - logger.warning("List of current jobs: %r", self.get_jobs()) + logger.debug("List of current jobs: %r", self.get_jobs()) @staticmethod def __enqueue_dramatiq_job__(**message): From 960965f71e525f2fbff76c9ca82463f85253a002 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 25 Oct 2023 09:57:59 +0200 Subject: [PATCH 02/57] fix: wrong log level --- lifemonitor/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifemonitor/metrics/__init__.py b/lifemonitor/metrics/__init__.py index 238bb8022..828f1a8c2 100644 --- a/lifemonitor/metrics/__init__.py +++ b/lifemonitor/metrics/__init__.py @@ -60,7 +60,7 @@ def init_metrics(app, prom_registry=None): else: logger.warning("Unable to start multiprocess prometheus exporter: 'PROMETHEUS_MULTIPROC_DIR' not set." f"Metrics will be exposed through the `{__METRICS_ENDPOINT__}` endpoint.") - logger.warning("Configured class for metrics: %r", metrics_class) + logger.info("Configured class for metrics: %r", metrics_class) # init metrics metrics = metrics_class(app, defaults_prefix=model.PREFIX, registry=prom_registry) From d7378d97bbf05da4b1624c1edd096475f4d4f40f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:17:19 +0200 Subject: [PATCH 03/57] feat: enable rotation of log files --- lifemonitor/config.py | 34 +++++++++++++++++++++++++--------- settings.conf | 3 +++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/lifemonitor/config.py b/lifemonitor/config.py index e2469f9f9..26d9dbf32 100644 --- a/lifemonitor/config.py +++ b/lifemonitor/config.py @@ -293,6 +293,12 @@ def configure_logging(app): if level_value == logging.DEBUG: log_format = f'[{COLOR_SEQ % (90)}%(asctime)s{RESET_SEQ}] %(levelname)s in %(module)s::%(funcName)s @ line: %(lineno)s: {COLOR_SEQ % (90)}%(message)s{RESET_SEQ}' + # configure and initialize log_path + log_path = app.config.get('LOG_PATH', '/var/log/lm') + if not os.path.exists(log_path): + os.makedirs(log_path, exist_ok=True) + + # configure logging dictConfig({ 'version': 1, 'formatters': {'default': { @@ -305,24 +311,34 @@ def configure_logging(app): # 'param': '', } }, - 'handlers': {'wsgi': { - 'class': 'logging.StreamHandler', - 'stream': 'ext://flask.logging.wsgi_errors_stream', - 'formatter': 'default', - 'filters': ['myfilter'] - }}, + 'handlers': { + 'wsgi': { + 'class': 'logging.StreamHandler', + 'stream': 'ext://flask.logging.wsgi_errors_stream', + 'formatter': 'default', + 'filters': ['myfilter'] + }, + 'rotating_to_file': { + 'level': logging.INFO, + 'class': "logging.handlers.RotatingFileHandler", + 'formatter': 'default', + "filename": "/var/log/lm/app.log", + "maxBytes": 10485760, + "backupCount": 10, + }, + }, 'response': { 'level': logging.INFO, - 'handlers': ['wsgi'], + 'handlers': ['wsgi', 'rotating_to_file'], }, 'root': { 'level': level_value, - 'handlers': ['wsgi'] + 'handlers': ['wsgi', 'rotating_to_file'] }, # Lower the log level for the github.Requester object -- else it'll flood us with messages 'Requester': { 'level': logging.ERROR, - 'handlers': ['wsgi'] + 'handlers': ['wsgi', 'rotating_to_file'] }, 'disable_existing_loggers': False, }) diff --git a/settings.conf b/settings.conf index 5407e8a07..603b764f7 100644 --- a/settings.conf +++ b/settings.conf @@ -4,6 +4,9 @@ FLASK_ENV=development # Set the LOG_LEVEL LOG_LEVEL=INFO # default: 'INFO' on production, 'DEBUG' on development +# Set the path for the log file +# LOG_PATH=/var/log/lm # default: /var/log/lm + # The name and port number of the back-end server (e.g., 'localhost:8000'). # If the back-end is served through a reverse proxy, # then you have to set SERVER_NAME to the appropriate proxy entry From b4f2e46110c7b0ba77fb7f97f002a286f7217d4a Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:28:37 +0200 Subject: [PATCH 04/57] feat(ctrl): add custom error page for 405 errors --- lifemonitor/errors.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lifemonitor/errors.py b/lifemonitor/errors.py index f01e2aae8..4e74e3329 100644 --- a/lifemonitor/errors.py +++ b/lifemonitor/errors.py @@ -82,6 +82,21 @@ def handle_404(e: Exception = None): } ) +@blueprint.route("/405") +def handle_405(e: Exception = None): + resource = request.args.get("resource", None, type=str) + logger.debug(f"Method not allowed for resource {resource}") + return handle_error( + { + "title": "LifeMonitor: Method not allowed", + "code": "404", + "description": str(e) + if e and logger.isEnabledFor(logging.DEBUG) + else "Method not allowed for this resource", + "resource": resource, + } + ) + @blueprint.route("/429") def handle_429(e: Exception = None): From a55c41c633ea1116b885221bd07b1fe4bf21e863 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:31:46 +0200 Subject: [PATCH 05/57] feat(k8s): add resource constraints for init containers --- k8s/templates/backend.deployment.yaml | 4 ++++ k8s/templates/worker.deployment.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/k8s/templates/backend.deployment.yaml b/k8s/templates/backend.deployment.yaml index 3e94f0d5b..30716c9f6 100644 --- a/k8s/templates/backend.deployment.yaml +++ b/k8s/templates/backend.deployment.yaml @@ -54,6 +54,10 @@ spec: {{- include "lifemonitor.common-env" . | nindent 12 }} volumeMounts: {{- include "lifemonitor.common-volume-mounts" . | nindent 12 }} + resources: + limits: + memory: 256Mi + cpu: 200m containers: - name: backend securityContext: diff --git a/k8s/templates/worker.deployment.yaml b/k8s/templates/worker.deployment.yaml index f5ff41e8c..eae050c15 100644 --- a/k8s/templates/worker.deployment.yaml +++ b/k8s/templates/worker.deployment.yaml @@ -50,6 +50,10 @@ spec: {{- include "lifemonitor.common-env" $ | nindent 12 }} volumeMounts: {{- include "lifemonitor.common-volume-mounts" $ | nindent 12 }} + resources: + limits: + memory: 256Mi + cpu: 200m containers: - name: worker securityContext: From 3f57e2421a41fa33203ff1132ad0e0481a037722 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:32:51 +0200 Subject: [PATCH 06/57] feat(k8s): mount empty volume for logs --- k8s/templates/_helpers.tpl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/k8s/templates/_helpers.tpl b/k8s/templates/_helpers.tpl index 2ab056c4c..620afc27e 100644 --- a/k8s/templates/_helpers.tpl +++ b/k8s/templates/_helpers.tpl @@ -167,6 +167,8 @@ Define mount points shared by some pods. - mountPath: "/lm/certs/" name: lifemonitor-tls readOnly: true +- name: lifemonitor-logs + mountPath: "/var/log" - name: lifemonitor-settings mountPath: "/lm/settings.conf" subPath: settings.conf From f3b60d74e5a05722cc652991bf0ab14e2798200d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:36:51 +0200 Subject: [PATCH 07/57] feat(k8s): allow logs export for backend pods --- k8s/templates/backend.deployment.yaml | 23 ++++++ k8s/templates/promtail.configmap.yaml | 113 ++++++++++++++++++++++++++ k8s/templates/worker.deployment.yaml | 23 ++++++ k8s/values.yaml | 4 + 4 files changed, 163 insertions(+) create mode 100644 k8s/templates/promtail.configmap.yaml diff --git a/k8s/templates/backend.deployment.yaml b/k8s/templates/backend.deployment.yaml index 30716c9f6..a307f4a0e 100644 --- a/k8s/templates/backend.deployment.yaml +++ b/k8s/templates/backend.deployment.yaml @@ -89,8 +89,31 @@ spec: periodSeconds: 3 resources: {{- toYaml .Values.lifemonitor.resources | nindent 12 }} + {{- if .Values.monitoring.loki.enabled }} + - name: log-exporter + image: {{ .Values.monitoring.loki.logExporterImage }} + args: + - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap + resources: + requests: + memory: 128Mi + cpu: 0.1 + limits: + memory: 256Mi + cpu: 0.2 + volumeMounts: + - name: promtail-config + mountPath: /etc/promtail + - name: lifemonitor-logs + mountPath: /var/log + {{- end }} volumes: {{- include "lifemonitor.common-volume" . | nindent 8 }} + {{- if .Values.monitoring.loki.enabled }} + - name: promtail-config + configMap: + name: "{{.Release.Name}}-promtail-backend-configmap" + {{- end }} {{- with .Values.lifemonitor.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/k8s/templates/promtail.configmap.yaml b/k8s/templates/promtail.configmap.yaml new file mode 100644 index 000000000..53e34239c --- /dev/null +++ b/k8s/templates/promtail.configmap.yaml @@ -0,0 +1,113 @@ +{{- if .Values.monitoring.loki.enabled }} +{{- $releaseName := .Release.Name }} +{{- $releaseNamespace := .Release.Namespace }} +{{- $lokiUrl := .Values.monitoring.loki.url }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{.Release.Name}}-promtail-proxy-configmap" +data: + promtail.yaml: | + server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: "debug" + positions: + filename: /tmp/positions.yaml + clients: # Specify target + - url: {{.Values.monitoring.loki.url}}/loki/api/v1/push + scrape_configs: + - job_name: "lifemonitor-api-proxy-logger" + static_configs: + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "{{.Release.Name}}-proxy" + environment: "{{.Release.Namespace}}" + format: "extended" + level: "INFO" + __path__: /var/log/nginx/access.log + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "{{.Release.Name}}-proxy" + environment: "{{.Release.Namespace}}" + format: "extended" + level: "ERROR" + __path__: /var/log/nginx/*error.log + pipeline_stages: + - drop: + expression: ".*(DEBUG|health|heartbeat).*" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{.Release.Name}}-promtail-backend-configmap" +data: + promtail.yaml: | + server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: "debug" + positions: + filename: /tmp/positions.yaml + clients: # Specify target + - url: {{ $lokiUrl }}/loki/api/v1/push + scrape_configs: + - job_name: "lifemonitor-api-backend-logger" + static_configs: + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "api-backend" + environment: "{{.releaseNamespace}}" + format: "backend" + __path__: /var/log/lm/*.log + pipeline_stages: + - drop: + expression: ".*(DEBUG|health|heartbeat).*" + - regex: + expression: '(.*)(?PERROR|INFO|DEBUG|WARNING)(.*)' + - labels: + level: log_level + + +{{- range $i, $queue := .Values.worker.queues }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{ $releaseName }}-promtail-worker-{{ $queue.name }}-configmap" +data: + promtail.yaml: | + server: + http_listen_port: 9080 + grpc_listen_port: 0 + log_level: "debug" + positions: + filename: /tmp/positions.yaml + clients: # Specify target + - url: {{ $lokiUrl }}/loki/api/v1/push + scrape_configs: + - job_name: "lifemonitor-api-backend-logger" + static_configs: + - targets: + - localhost + labels: + app: "lifemonitor-backend" + component: "api-worker-{{ $queue.name }}" + environment: "{{.releaseNamespace}}" + format: "backend" + __path__: /var/log/lm/*.log + pipeline_stages: + - drop: + expression: ".*(DEBUG|health|heartbeat).*" + - regex: + expression: '(.*)(?PERROR|INFO|DEBUG|WARNING)(.*)' + - labels: + level: log_level +{{- end }} +{{- end }} \ No newline at end of file diff --git a/k8s/templates/worker.deployment.yaml b/k8s/templates/worker.deployment.yaml index eae050c15..83c428a94 100644 --- a/k8s/templates/worker.deployment.yaml +++ b/k8s/templates/worker.deployment.yaml @@ -92,8 +92,31 @@ spec: # periodSeconds: 3 resources: {{- toYaml $.Values.worker.resources | nindent 12 }} + {{- if $.Values.monitoring.loki.enabled }} + - name: log-exporter + image: {{ $.Values.monitoring.loki.logExporterImage }} + args: + - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap + resources: + requests: + memory: 128Mi + cpu: 0.1 + limits: + memory: 256Mi + cpu: 0.2 + volumeMounts: + - name: promtail-config + mountPath: /etc/promtail + - name: lifemonitor-logs + mountPath: /var/log + {{- end }} volumes: {{- include "lifemonitor.common-volume" $ | nindent 8 }} + {{- if $.Values.monitoring.loki.enabled }} + - name: promtail-config + configMap: + name: "{{$.Release.Name}}-promtail-worker-{{ $queue.name }}-configmap" + {{- end }} {{- with $.Values.worker.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/k8s/values.yaml b/k8s/values.yaml index f02c2dee6..93744f759 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -136,6 +136,10 @@ monitoring: enabled: false prometheus: namespace: kube-prometheus-stack + loki: + enabled: false + url: http://loki:3100 + logExporterImage: grafana/promtail:main-60ea954 rateLimiting: zone: From e78e50ae09a405b88ca49d76aae70e2655a5a564 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:43:54 +0200 Subject: [PATCH 08/57] style(pep8): fix blanks --- lifemonitor/errors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lifemonitor/errors.py b/lifemonitor/errors.py index 4e74e3329..234915289 100644 --- a/lifemonitor/errors.py +++ b/lifemonitor/errors.py @@ -82,6 +82,7 @@ def handle_404(e: Exception = None): } ) + @blueprint.route("/405") def handle_405(e: Exception = None): resource = request.args.get("resource", None, type=str) From 588d44bc1dbbd176a27399e7077b3825832193f4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:54:30 +0200 Subject: [PATCH 09/57] style(pep8): fix blanks --- k8s/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/values.yaml b/k8s/values.yaml index 93744f759..7aa7bf62e 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -144,7 +144,7 @@ monitoring: rateLimiting: zone: accounts: - enabled: false + enabled: false size: 60m rate: 2r/s burst: 20 From 60685af3a157e5e64bc8a09949bb389eb0ec6db4 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 26 Oct 2023 10:55:43 +0200 Subject: [PATCH 10/57] feat(k8s): set default resource constraints --- k8s/values.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/k8s/values.yaml b/k8s/values.yaml index 7aa7bf62e..d3cbc00fc 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -197,7 +197,6 @@ lifemonitor: enableTestConnection: false resources: - {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -208,6 +207,12 @@ lifemonitor: # requests: # cpu: 100m # memory: 128Mi + limits: + cpu: 1.8 + memory: 7936Mi + requests: + cpu: 0.5 + memory: 1024Mi autoscaling: enabled: false @@ -264,7 +269,6 @@ worker: replicaCount: 1 resources: - {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -275,6 +279,12 @@ worker: # requests: # cpu: 100m # memory: 128Mi + limits: + cpu: 1.8 + memory: 7936Mi + requests: + cpu: 0.5 + memory: 1024Mi autoscaling: enabled: false From 8a2601a258dea43517a953462c8de69b363fe3a5 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 27 Oct 2023 17:46:49 +0200 Subject: [PATCH 11/57] refactor(k8s): reconfigure exporter image --- k8s/templates/backend.deployment.yaml | 3 ++- k8s/templates/worker.deployment.yaml | 3 ++- k8s/values.yaml | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/k8s/templates/backend.deployment.yaml b/k8s/templates/backend.deployment.yaml index a307f4a0e..a674033b4 100644 --- a/k8s/templates/backend.deployment.yaml +++ b/k8s/templates/backend.deployment.yaml @@ -91,7 +91,8 @@ spec: {{- toYaml .Values.lifemonitor.resources | nindent 12 }} {{- if .Values.monitoring.loki.enabled }} - name: log-exporter - image: {{ .Values.monitoring.loki.logExporterImage }} + image: {{ .Values.monitoring.loki.exporter.image }} + imagePullPolicy: {{ $.Values.monitoring.loki.exporter.imagePullPolicy }} args: - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap resources: diff --git a/k8s/templates/worker.deployment.yaml b/k8s/templates/worker.deployment.yaml index 83c428a94..81f0264e2 100644 --- a/k8s/templates/worker.deployment.yaml +++ b/k8s/templates/worker.deployment.yaml @@ -94,7 +94,8 @@ spec: {{- toYaml $.Values.worker.resources | nindent 12 }} {{- if $.Values.monitoring.loki.enabled }} - name: log-exporter - image: {{ $.Values.monitoring.loki.logExporterImage }} + image: {{ $.Values.monitoring.loki.exporter.image }} + imagePullPolicy: {{ $.Values.monitoring.loki.exporter.imagePullPolicy }} args: - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap resources: diff --git a/k8s/values.yaml b/k8s/values.yaml index d3cbc00fc..0eed07a61 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -139,7 +139,9 @@ monitoring: loki: enabled: false url: http://loki:3100 - logExporterImage: grafana/promtail:main-60ea954 + exporter: + image: grafana/promtail:main-60ea954 + imagePullPolicy: IfNotPresent rateLimiting: zone: From 747d8fa211c468466e13c545a2ce317b60ee779f Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 27 Oct 2023 17:48:53 +0200 Subject: [PATCH 12/57] feat(k8s): allow to configure resources of log exporters --- k8s/templates/backend.deployment.yaml | 7 +------ k8s/templates/worker.deployment.yaml | 7 +------ k8s/values.yaml | 7 +++++++ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/k8s/templates/backend.deployment.yaml b/k8s/templates/backend.deployment.yaml index a674033b4..4b2c231d4 100644 --- a/k8s/templates/backend.deployment.yaml +++ b/k8s/templates/backend.deployment.yaml @@ -96,12 +96,7 @@ spec: args: - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap resources: - requests: - memory: 128Mi - cpu: 0.1 - limits: - memory: 256Mi - cpu: 0.2 + {{- toYaml $.Values.monitoring.loki.exporter.resources | nindent 12 }} volumeMounts: - name: promtail-config mountPath: /etc/promtail diff --git a/k8s/templates/worker.deployment.yaml b/k8s/templates/worker.deployment.yaml index 81f0264e2..4df21cc4e 100644 --- a/k8s/templates/worker.deployment.yaml +++ b/k8s/templates/worker.deployment.yaml @@ -99,12 +99,7 @@ spec: args: - "-config.file=/etc/promtail/promtail.yaml" # Found in the ConfigMap resources: - requests: - memory: 128Mi - cpu: 0.1 - limits: - memory: 256Mi - cpu: 0.2 + {{- toYaml $.Values.monitoring.loki.exporter.resources | nindent 12 }} volumeMounts: - name: promtail-config mountPath: /etc/promtail diff --git a/k8s/values.yaml b/k8s/values.yaml index 0eed07a61..9ada44564 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -142,6 +142,13 @@ monitoring: exporter: image: grafana/promtail:main-60ea954 imagePullPolicy: IfNotPresent + resources: + requests: + memory: 128Mi + cpu: 0.1 + limits: + memory: 256Mi + cpu: 0.2 rateLimiting: zone: From 319dd73e98fb78c9927303ad8c5465a26dc93c68 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 27 Oct 2023 18:05:03 +0200 Subject: [PATCH 13/57] feat(k8s): allow to configure resources of initContainers --- k8s/templates/backend.deployment.yaml | 4 +--- k8s/templates/worker.deployment.yaml | 4 +--- k8s/values.yaml | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/k8s/templates/backend.deployment.yaml b/k8s/templates/backend.deployment.yaml index 4b2c231d4..aaedf7f69 100644 --- a/k8s/templates/backend.deployment.yaml +++ b/k8s/templates/backend.deployment.yaml @@ -55,9 +55,7 @@ spec: volumeMounts: {{- include "lifemonitor.common-volume-mounts" . | nindent 12 }} resources: - limits: - memory: 256Mi - cpu: 200m + {{- toYaml .Values.lifemonitor.initContainers.initBackend.resources | nindent 12 }} containers: - name: backend securityContext: diff --git a/k8s/templates/worker.deployment.yaml b/k8s/templates/worker.deployment.yaml index 4df21cc4e..7d06c3cab 100644 --- a/k8s/templates/worker.deployment.yaml +++ b/k8s/templates/worker.deployment.yaml @@ -51,9 +51,7 @@ spec: volumeMounts: {{- include "lifemonitor.common-volume-mounts" $ | nindent 12 }} resources: - limits: - memory: 256Mi - cpu: 200m + {{- toYaml $.Values.worker.initContainers.initWorker.resources | nindent 12 }} containers: - name: worker securityContext: diff --git a/k8s/values.yaml b/k8s/values.yaml index 9ada44564..6dbc2303d 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -223,6 +223,14 @@ lifemonitor: cpu: 0.5 memory: 1024Mi + # configure resources for the init containers + initContainers: + initBackend: + resources: + limits: + memory: 256Mi + cpu: 200m + autoscaling: enabled: false minReplicas: 1 @@ -295,6 +303,14 @@ worker: cpu: 0.5 memory: 1024Mi + # configure resources for the init containers + initContainers: + initWorker: + resources: + limits: + memory: 256Mi + cpu: 200m + autoscaling: enabled: false minReplicas: 1 From a39bbc2311db7b342e5b756c057ed706e69c266d Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Fri, 27 Oct 2023 18:32:35 +0200 Subject: [PATCH 14/57] fix(docker): init logs folder --- docker/lifemonitor.Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/lifemonitor.Dockerfile b/docker/lifemonitor.Dockerfile index 3f8d67f3a..9969706a2 100644 --- a/docker/lifemonitor.Dockerfile +++ b/docker/lifemonitor.Dockerfile @@ -67,6 +67,7 @@ RUN mkdir -p /var/data/lm \ && chown -R lm:lm /var/data/lm \ && ln -s /var/data/lm /lm/data \ && chown -R lm:lm /lm/data \ + && mkdir -p /var/log/lm && chown -R lm:lm /var/log/lm \ && mkdir /lm/.nextflow && chmod -R 777 /lm/.nextflow # Set the default user From cc399f97517635aaa924a34b388f93195011e4c6 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Sat, 28 Oct 2023 12:25:46 +0200 Subject: [PATCH 15/57] feat(docker): configure lm user parametrically --- Makefile | 4 +++- docker/lifemonitor.Dockerfile | 12 +++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index dddd13a72..e1092ea42 100644 --- a/Makefile +++ b/Makefile @@ -139,7 +139,9 @@ lifemonitor: docker/lifemonitor.Dockerfile certs app.py gunicorn.conf.py ## Buil printf "\n$(yellow)WARNING: $(bold)Skip build of LifeMonitor Docker image !!! $(reset)\n" ; \ else \ printf "\n$(bold)Building LifeMonitor Docker image...$(reset)\n" ; \ - $(build_kit) docker $(build_cmd) $(cache_from_opt) $(cache_to_opt) \ + $(build_kit) docker $(build_cmd) \ + --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) \ + $(cache_from_opt) $(cache_to_opt) \ ${sw_version_arg} ${build_number_arg} ${tags_opt} ${labels_opt} ${platforms_opt} \ -f docker/lifemonitor.Dockerfile -t crs4/lifemonitor . ;\ printf "$(done)\n" ; \ diff --git a/docker/lifemonitor.Dockerfile b/docker/lifemonitor.Dockerfile index 9969706a2..ed6bfe018 100644 --- a/docker/lifemonitor.Dockerfile +++ b/docker/lifemonitor.Dockerfile @@ -8,8 +8,18 @@ RUN apt-get update -q \ postgresql-client-11 default-jre \ && apt-get clean -y && rm -rf /var/lib/apt/lists +# Set the parametric USER ID +ARG USER_ID +ENV USER_ID=${USER_ID:-1000} + +# Set the parametric GROUP ID +ARG GROUP_ID +ENV GROUP_ID=${GROUP_ID:-1000} + # Create a user 'lm' with HOME at /lm and set 'lm' as default git user -RUN useradd -d /lm -m lm +RUN groupadd -g ${GROUP_ID} lm && \ + useradd -u ${USER_ID} -g lm -d /lm -m lm + # Set the default user ENV USER=lm From 7dd72c48a66793d8c3870bef2e6b465c6ffea258 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Sat, 28 Oct 2023 13:32:12 +0200 Subject: [PATCH 16/57] fix(docker-compose): don't overrride user on testing environment --- Makefile | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index e1092ea42..140290f09 100644 --- a/Makefile +++ b/Makefile @@ -223,14 +223,13 @@ start-dev: images compose-files dev reset_compose permissions ## Start LifeMonit start-testing: compose-files aux_images ro_crates images reset_compose permissions ## Start LifeMonitor in a Testing environment @printf "\n$(bold)Starting testing services...$(reset)\n" ; \ base=$$(if [[ -f "docker-compose.yml" ]]; then echo "-f docker-compose.yml"; fi) ; \ - echo "$$(USER_UID=$$(id -u) USER_GID=$$(id -g) \ - $(docker_compose) $${base} \ - -f docker-compose.extra.yml \ - -f docker-compose.base.yml \ - -f docker-compose.monitoring.yml \ - -f docker-compose.dev.yml \ - -f docker-compose.test.yml \ - config)" > docker-compose.yml \ + echo "$$($(docker_compose) $${base} \ + -f docker-compose.extra.yml \ + -f docker-compose.base.yml \ + -f docker-compose.monitoring.yml \ + -f docker-compose.dev.yml \ + -f docker-compose.test.yml \ + config)" > docker-compose.yml \ && cp {,.test.}docker-compose.yml \ && $(docker_compose) -f docker-compose.yml up -d db lmtests seek jenkins webserver worker ws_server ;\ $(docker_compose) -f ./docker-compose.yml \ @@ -264,7 +263,7 @@ start-aux-services: aux_images ro_crates docker-compose.extra.yml permissions ## run-tests: start-testing ## Run all tests in the Testing Environment @printf "\n$(bold)Running tests...$(reset)\n" ; \ - USER_UID=$$(id -u) USER_GID=$$(id -g) \ + docker compose logs ; \ $(docker_compose) exec -T lmtests /bin/bash -c "pytest --durations=10 --color=yes tests" From 07b78cf43362d2caf3ee11712b3f68cf4b1555fc Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Sat, 28 Oct 2023 14:10:55 +0200 Subject: [PATCH 17/57] chore: remove verbose logs before tests --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 140290f09..6701bcb92 100644 --- a/Makefile +++ b/Makefile @@ -263,7 +263,6 @@ start-aux-services: aux_images ro_crates docker-compose.extra.yml permissions ## run-tests: start-testing ## Run all tests in the Testing Environment @printf "\n$(bold)Running tests...$(reset)\n" ; \ - docker compose logs ; \ $(docker_compose) exec -T lmtests /bin/bash -c "pytest --durations=10 --color=yes tests" From 0207822071fd627525cc33477c60057c25320556 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 30 Oct 2023 08:11:15 +0100 Subject: [PATCH 18/57] fix(k8s): missing env label --- k8s/templates/promtail.configmap.yaml | 29 ++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/k8s/templates/promtail.configmap.yaml b/k8s/templates/promtail.configmap.yaml index 53e34239c..b997c876c 100644 --- a/k8s/templates/promtail.configmap.yaml +++ b/k8s/templates/promtail.configmap.yaml @@ -1,11 +1,8 @@ {{- if .Values.monitoring.loki.enabled }} -{{- $releaseName := .Release.Name }} -{{- $releaseNamespace := .Release.Namespace }} -{{- $lokiUrl := .Values.monitoring.loki.url }} apiVersion: v1 kind: ConfigMap metadata: - name: "{{.Release.Name}}-promtail-proxy-configmap" + name: "{{$.Release.Name}}-promtail-proxy-configmap" data: promtail.yaml: | server: @@ -15,16 +12,16 @@ data: positions: filename: /tmp/positions.yaml clients: # Specify target - - url: {{.Values.monitoring.loki.url}}/loki/api/v1/push + - url: {{ $.Values.monitoring.loki.url }}/loki/api/v1/push scrape_configs: - job_name: "lifemonitor-api-proxy-logger" static_configs: - - targets: + - targets: - localhost labels: app: "lifemonitor-backend" - component: "{{.Release.Name}}-proxy" - environment: "{{.Release.Namespace}}" + component: "{{$.Release.Name}}-proxy" + environment: "{{$.Release.Namespace}}" format: "extended" level: "INFO" __path__: /var/log/nginx/access.log @@ -32,8 +29,8 @@ data: - localhost labels: app: "lifemonitor-backend" - component: "{{.Release.Name}}-proxy" - environment: "{{.Release.Namespace}}" + component: "{{ $.Release.Name }}-proxy" + environment: "{{ $.Release.Namespace }}" format: "extended" level: "ERROR" __path__: /var/log/nginx/*error.log @@ -44,7 +41,7 @@ data: apiVersion: v1 kind: ConfigMap metadata: - name: "{{.Release.Name}}-promtail-backend-configmap" + name: "{{ $.Release.Name }}-promtail-backend-configmap" data: promtail.yaml: | server: @@ -54,7 +51,7 @@ data: positions: filename: /tmp/positions.yaml clients: # Specify target - - url: {{ $lokiUrl }}/loki/api/v1/push + - url: {{ $.Values.monitoring.loki.url }}/loki/api/v1/push scrape_configs: - job_name: "lifemonitor-api-backend-logger" static_configs: @@ -63,7 +60,7 @@ data: labels: app: "lifemonitor-backend" component: "api-backend" - environment: "{{.releaseNamespace}}" + environment: "{{ $.Release.Namespace }}" format: "backend" __path__: /var/log/lm/*.log pipeline_stages: @@ -80,7 +77,7 @@ data: apiVersion: v1 kind: ConfigMap metadata: - name: "{{ $releaseName }}-promtail-worker-{{ $queue.name }}-configmap" + name: "{{ $.Release.Name }}-promtail-worker-{{ $queue.name }}-configmap" data: promtail.yaml: | server: @@ -90,7 +87,7 @@ data: positions: filename: /tmp/positions.yaml clients: # Specify target - - url: {{ $lokiUrl }}/loki/api/v1/push + - url: {{ $.Values.monitoring.loki.url }}/loki/api/v1/push scrape_configs: - job_name: "lifemonitor-api-backend-logger" static_configs: @@ -99,7 +96,7 @@ data: labels: app: "lifemonitor-backend" component: "api-worker-{{ $queue.name }}" - environment: "{{.releaseNamespace}}" + environment: "{{ $.Release.Namespace }}" format: "backend" __path__: /var/log/lm/*.log pipeline_stages: From 41eed0a3fc38b43f0dbc006ffc145565d65d9d28 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 2 Nov 2023 09:57:12 +0100 Subject: [PATCH 19/57] build: bump version number --- k8s/Chart.yaml | 4 ++-- lifemonitor/static/src/package.json | 2 +- specs/api.yaml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/k8s/Chart.yaml b/k8s/Chart.yaml index 591bec1f5..1f5ad15c4 100644 --- a/k8s/Chart.yaml +++ b/k8s/Chart.yaml @@ -7,12 +7,12 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.10.0 +version: 0.11.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. -appVersion: 0.11.7 +appVersion: 0.11.8 # Chart dependencies dependencies: diff --git a/lifemonitor/static/src/package.json b/lifemonitor/static/src/package.json index 856f2e8a4..d7702bce1 100644 --- a/lifemonitor/static/src/package.json +++ b/lifemonitor/static/src/package.json @@ -1,7 +1,7 @@ { "name": "lifemonitor", "description": "Workflow Testing Service", - "version": "0.11.7", + "version": "0.11.8", "license": "MIT", "author": "CRS4", "main": "../dist/js/lifemonitor.min.js", diff --git a/specs/api.yaml b/specs/api.yaml index 78dc09749..00719c62a 100644 --- a/specs/api.yaml +++ b/specs/api.yaml @@ -3,7 +3,7 @@ openapi: "3.0.0" info: - version: "0.11.7" + version: "0.11.8" title: "Life Monitor API" description: | *Workflow sustainability service* @@ -18,7 +18,7 @@ info: servers: - url: / description: > - Version 0.11.7 of API. + Version 0.11.8 of API. tags: - name: GitHub Integration From e6e8c10d990d9f15e5f576c5e95a0766fc84abb7 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 2 Nov 2023 15:42:05 +0100 Subject: [PATCH 20/57] refactor: update metrics on Grafana dashboard --- ...hboard.json => LifeMonitor Dashboard.json} | 326 +++++++++++------- 1 file changed, 204 insertions(+), 122 deletions(-) rename utils/grafana/{LifeMonitorDashboard.json => LifeMonitor Dashboard.json} (86%) diff --git a/utils/grafana/LifeMonitorDashboard.json b/utils/grafana/LifeMonitor Dashboard.json similarity index 86% rename from utils/grafana/LifeMonitorDashboard.json rename to utils/grafana/LifeMonitor Dashboard.json index 976656905..1c7be4929 100644 --- a/utils/grafana/LifeMonitorDashboard.json +++ b/utils/grafana/LifeMonitor Dashboard.json @@ -107,9 +107,6 @@ "color": { "mode": "thresholds" }, - "custom": { - "neutral": 38 - }, "mappings": [], "thresholds": { "mode": "absolute", @@ -131,6 +128,9 @@ }, "id": 18, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -139,8 +139,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -149,15 +148,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_users)", + "expr": "max(lifemonitor_api_users{environment=\"$environment\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Users", - "type": "gauge" + "type": "stat" }, { "datasource": { @@ -191,6 +194,9 @@ }, "id": 20, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -199,8 +205,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -209,15 +214,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflows)", + "expr": "max(lifemonitor_api_workflows{environment=\"$environment\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Workflows", - "type": "gauge" + "type": "stat" }, { "datasource": { @@ -251,6 +260,9 @@ }, "id": 24, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -259,8 +271,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -269,15 +280,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_versions)", + "expr": "max(lifemonitor_api_workflow_versions{environment=\"$environment\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Workflow Versions", - "type": "gauge" + "type": "stat" }, { "datasource": { @@ -315,6 +330,9 @@ }, "id": 26, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -323,8 +341,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -333,15 +350,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_registries)", + "expr": "max(lifemonitor_api_workflow_registries{environment=\"$environment\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Workflow Registries", - "type": "gauge" + "type": "stat" }, { "datasource": { @@ -375,6 +396,9 @@ }, "id": 28, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -383,8 +407,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -393,15 +416,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_suites)", + "expr": "max(lifemonitor_api_workflow_suites{environment=\"$environment\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Workflow Suites", - "type": "gauge" + "type": "stat" }, { "datasource": { @@ -435,6 +462,9 @@ }, "id": 30, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -443,8 +473,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -453,15 +482,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_api_workflow_test_instances)", + "expr": "max(lifemonitor_api_workflow_test_instances{environment=\"$environment\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Workflow Test Instances", - "type": "gauge" + "type": "stat" }, { "collapsed": false, @@ -532,7 +565,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(lifemonitor_webapp_http_websocket_connections_total)", + "expr": "sum(lifemonitor_webapp_http_websocket_connections_total{environment=\"$environment\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", @@ -670,7 +703,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "lifemonitor_webapp_http_websocket_connections_total", + "expr": "lifemonitor_webapp_http_websocket_connections_total{environment=\"$environment\"}", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -765,7 +798,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(client_country_long) (rate(lifemonitor_webapp_http_requests_total[30s]))", + "expr": "sum by(client_country_long) (rate(lifemonitor_webapp_http_requests_total{environment=\"$environment\"}[30s]))", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", @@ -841,7 +874,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "rate(lifemonitor_webapp_http_connections{status=\"200\"}[30s])", + "expr": "rate(lifemonitor_webapp_http_connections{status=\"200\", environment=\"$environment\"}[30s])", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -907,7 +940,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"3[0-9][0-9]\"}[30s]))", + "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"3[0-9][0-9]\", environment=\"$environment\"}[0-9][0-9]\"}[30s]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -974,7 +1007,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"4[0-9][0-9]\"}[30s]))", + "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"4[0-9][0-9]\", environment=\"$environment\"}[0-9][0-9]\"}[30s]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -1041,7 +1074,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"5[0-9][0-9]\"}[30s]))", + "expr": "sum(rate(lifemonitor_webapp_http_connections{status=~\"5[0-9][0-9]\", environment=\"$environment\"}[0-9][0-9]\"}[30s]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -1121,7 +1154,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(status) (lifemonitor_webapp_http_connections)", + "expr": "sum by(status) (lifemonitor_webapp_http_connections{environment=\"$environment\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -1178,6 +1211,9 @@ }, "id": 49, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ @@ -1186,8 +1222,7 @@ "fields": "", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -1198,7 +1233,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "increase(lifemonitor_api_proxy_http_active_connections_number[30s])", + "expr": "increase(lifemonitor_api_proxy_http_connection_status{state=\"active\", environment=\"$environment\"}[30s])", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -1209,7 +1244,7 @@ } ], "title": "Active HTTP Connections", - "type": "gauge" + "type": "stat" }, { "datasource": { @@ -1242,16 +1277,18 @@ }, "id": 50, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "distinctCount" ], - "fields": "", + "fields": "/^Distinct Count$/", "values": false }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "textMode": "auto" }, "pluginVersion": "10.1.1", "targets": [ @@ -1262,7 +1299,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "increase(lifemonitor_api_proxy_http_upstream_connections_total[30s])", + "expr": "increase(lifemonitor_api_proxy_http_connection_requests{upstream_address!=\"-\", request!=\"/metrics\", environment=\"$environment\"}[30s])", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -1273,7 +1310,18 @@ } ], "title": "Active Upstream Connections", - "type": "gauge" + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "reducer": "distinctCount" + } + } + } + ], + "type": "stat" }, { "datasource": { @@ -1293,7 +1341,8 @@ "viz": false } }, - "mappings": [] + "mappings": [], + "unit": "none" }, "overrides": [] }, @@ -1305,10 +1354,14 @@ }, "id": 47, "options": { + "displayLabels": [], "legend": { "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": true, + "values": [ + "percent" + ] }, "pieType": "pie", "reduceOptions": { @@ -1323,7 +1376,7 @@ "sort": "none" } }, - "pluginVersion": "10.1.0", + "pluginVersion": "10.1.1", "targets": [ { "datasource": { @@ -1332,16 +1385,18 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(client_country_long) (increase(lifemonitor_api_proxy_http_requests_total[30s]))", + "expr": "sum by(country_long) (increase(lifemonitor_api_proxy_http_connection_requests{request!=\"/metrics\", environment=\"$environment\"}[30s]))", + "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, - "legendFormat": "__auto", + "legendFormat": "{{country_long}}", "range": true, "refId": "A", "useBackend": false } ], "title": "Active Requests by Country", + "transformations": [], "type": "piechart" }, { @@ -1349,7 +1404,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Number of Active Clients By Country", + "description": "Percentage of request errors by type", "fieldConfig": { "defaults": { "color": { @@ -1374,15 +1429,19 @@ }, "id": 51, "options": { + "displayLabels": [], "legend": { "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": true, + "values": [ + "percent" + ] }, "pieType": "pie", "reduceOptions": { "calcs": [ - "lastNotNull" + "range" ], "fields": "", "values": false @@ -1392,7 +1451,7 @@ "sort": "none" } }, - "pluginVersion": "10.1.0", + "pluginVersion": "10.1.1", "targets": [ { "datasource": { @@ -1401,7 +1460,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(client_country_long) (lifemonitor_api_proxy_http_requests_total)", + "expr": "sum by(status) (increase(lifemonitor_api_proxy_http_requests_error_total{environment=\"$environment\"}[30s]))", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", @@ -1410,7 +1469,8 @@ "useBackend": false } ], - "title": "# Requests by Country", + "title": "% Request Errors", + "transformations": [], "type": "piechart" }, { @@ -1533,15 +1593,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "disableTextWrap": false, "editorMode": "builder", "exemplar": true, - "expr": "increase(lifemonitor_api_http_request_total[1m])", + "expr": "increase(lifemonitor_api_http_request_total{environment=\"$environment\"}[1m])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "HTTP {{ status }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Total requests per minute", @@ -1604,32 +1668,7 @@ }, "unit": "short" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "/workflows" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 14, @@ -1662,15 +1701,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\"}[30s])", + "expr": "rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Requests per second", @@ -1722,8 +1765,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1783,15 +1825,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "sum(rate(lifemonitor_api_http_request_duration_seconds_count{status!=\"200\"}[30s]))", + "expr": "sum(rate(lifemonitor_api_http_request_duration_seconds_count{status!=\"200\", environment=\"$environment\"}[30s]))", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "errors", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Errors per second", @@ -1843,8 +1889,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1886,15 +1931,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "rate(lifemonitor_api_http_request_duration_seconds_sum{status=\"200\"}[30s])\n/\nrate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\"}[30s])", + "expr": "rate(lifemonitor_api_http_request_duration_seconds_sum{status=\"200\", environment=\"$environment\"}[30s]) / rate(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Average response time [30s]", @@ -1948,8 +1997,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1991,15 +2039,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "increase(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\",le=\"0.25\"}[30s]) \n/ ignoring (le) increase(lifemonitor_api_http_request_duration_seconds_count{status=\"200\"}[30s])", + "expr": "increase(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", le=\"0.25\", environment=\"$environment\"}[30s]) / ignoring(le) increase(lifemonitor_api_http_request_duration_seconds_count{status=\"200\", environment=\"$environment\"}[30s])", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Requests under 250ms", @@ -2052,8 +2104,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2098,15 +2149,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "histogram_quantile(0.5, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\"}[30s]))", + "expr": "histogram_quantile(0.5, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", environment=\"$environment\"}[30s]))", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Request duration [s] - p50", @@ -2158,8 +2213,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2204,30 +2258,58 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", + "disableTextWrap": false, + "editorMode": "builder", "exemplar": true, - "expr": "histogram_quantile(0.9, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\"}[30s]))", + "expr": "histogram_quantile(0.9, rate(lifemonitor_api_http_request_duration_seconds_bucket{status=\"200\", environment=\"$environment\"}[30s]))", "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Request duration [s] - p90", "type": "timeseries" } ], - "refresh": "auto", + "refresh": "5s", "schemaVersion": 38, "style": "dark", "tags": [], "templating": { - "list": [] + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(environment)", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": true, + "name": "environment", + "options": [], + "query": { + "query": "label_values(environment)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { - "from": "now-5m", + "from": "now-15m", "to": "now" }, "timepicker": { @@ -2249,6 +2331,6 @@ "timezone": "Europe/Rome", "title": "LifeMonitor Dashboard", "uid": "_eX4mpl3", - "version": 12, + "version": 26, "weekStart": "" } \ No newline at end of file From 45e14e0b769cab87884b406d4c08b7a4fc441659 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Thu, 2 Nov 2023 15:45:40 +0100 Subject: [PATCH 21/57] feat: add logs view on Grafana dashboard --- utils/grafana/Logs Explorer.json | 583 +++++++++++++++++++++++++++++++ 1 file changed, 583 insertions(+) create mode 100644 utils/grafana/Logs Explorer.json diff --git a/utils/grafana/Logs Explorer.json b/utils/grafana/Logs Explorer.json new file mode 100644 index 000000000..a78898647 --- /dev/null +++ b/utils/grafana/Logs Explorer.json @@ -0,0 +1,583 @@ +{ + "__inputs": [ + { + "name": "DS_LOKI", + "label": "Loki", + "description": "", + "type": "datasource", + "pluginId": "loki", + "pluginName": "Loki" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "geomap", + "name": "Geomap", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.1.1" + }, + { + "type": "panel", + "id": "logs", + "name": "Logs", + "version": "" + }, + { + "type": "datasource", + "id": "loki", + "name": "Loki", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:75", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Explore logs reported via Promtail", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12019, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 7, + "options": { + "basemap": { + "config": {}, + "name": "Layer 0", + "type": "default" + }, + "controls": { + "mouseWheelZoom": true, + "showAttribution": true, + "showDebug": false, + "showMeasure": false, + "showScale": false, + "showZoom": true + }, + "layers": [ + { + "config": { + "showLegend": true, + "style": { + "color": { + "fixed": "dark-green" + }, + "opacity": 0.4, + "rotation": { + "fixed": 0, + "max": 360, + "min": -360, + "mode": "mod" + }, + "size": { + "fixed": 5, + "max": 15, + "min": 2 + }, + "symbol": { + "fixed": "img/icons/marker/circle.svg", + "mode": "fixed" + }, + "text": { + "fixed": "", + "mode": "field" + }, + "textConfig": { + "fontSize": 12, + "offsetX": 0, + "offsetY": 0, + "textAlign": "center", + "textBaseline": "middle" + } + } + }, + "filterData": { + "id": "byRefId", + "options": "A" + }, + "location": { + "mode": "auto" + }, + "name": "Layer 1", + "tooltip": true, + "type": "markers" + } + ], + "tooltip": { + "mode": "details" + }, + "view": { + "allLayers": true, + "id": "zero", + "lat": 0, + "lon": 0, + "zoom": 1 + } + }, + "pluginVersion": "10.1.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "editorMode": "builder", + "expr": "{app=\"$app\", component=\"$component\"} | pattern ` - [