Skip to content

Commit

Permalink
[Monitoring] Add monitoring setup production environment (Cloud-CV#3602)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ram81 authored Oct 10, 2021
1 parent 81f9c46 commit 8b7fbfe
Show file tree
Hide file tree
Showing 6 changed files with 179 additions and 2 deletions.
77 changes: 77 additions & 0 deletions docker-compose-production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,80 @@ services:
context: ./
dockerfile: docker/prod/code-upload-worker/Dockerfile

prometheus:
hostname: prometheus
image: prom/prometheus:latest
user: "1000"
volumes:
- ./monitoring/prometheus/prometheus_production.yml:/etc/prometheus/prometheus.yml
- ./monitoring/prometheus/rules.yml:/etc/rules/rules.yml
- ./monitoring/prometheus/prometheus_db:/var/lib/prometheus
- ./monitoring/prometheus/prometheus_db:/prometheus
- ./monitoring/prometheus/prometheus_db:/etc/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.external-url=http://localhost:9090/prometheus'
ports:
- '9090:9090'

grafana:
hostname: grafana
image: grafana/grafana:latest
user: "1000"
env_file:
- docker/prod/docker_production.env
volumes:
- ./monitoring/grafana/grafana_db:/var/lib/grafana
depends_on:
- prometheus
ports:
- '3000:3000'

statsd-exporter:
hostname: statsd
image: prom/statsd-exporter:latest
command:
- '--log.level=info'
- '--web.telemetry-path=/statsd/metrics'
ports:
- '9125:9125/udp'
- '9125:9125/tcp'
- '9102:9102'

node_exporter:
hostname: node_exporter
image: prom/node-exporter
ports:
- '9100:9100'

nginx-ingress:
image: ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/evalai-production-nginx-ingress:${COMMIT_ID}
build:
context: ./
dockerfile: docker/prod/nginx-ingress/Dockerfile
args:
MONITORING_ENV: production
depends_on:
- prometheus
- grafana
- statsd-exporter
- alert-manager
ports:
- '80:80'
- '443:443'

alert-manager:
hostname: alert_manager
image: prom/alertmanager
user: "1000"
volumes:
- ./monitoring/prometheus:/prometheus
- ./monitoring/alertmanager/data:/data
- ./monitoring/alertmanager/templates:/etc/alertmanager/templates
command:
- '--config.file=/prometheus/alert_manager.yml'
- '--storage.path=/data'
- '--web.external-url=http://localhost:9093/alert_manager'
ports:
- '9093:9093'

68 changes: 68 additions & 0 deletions docker/prod/nginx-ingress/nginx_production.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
upstream prometheus {
server prometheus:9090 fail_timeout=0;
}

upstream grafana {
server grafana:3000 fail_timeout=0;
}

upstream statsd_exporter {
server statsd:9102 fail_timeout=0;
}

upstream alert_manager {
server alert_manager:9093 fail_timeout=0;
}

server {
server_name monitoring.eval.ai;
listen 80;
return 301 https://monitoring.eval.ai$request_uri;
}

server {
server_name monitoring.eval.ai;
listen 443 ssl;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
}

ssl on;
ssl_certificate /etc/ssl/eval_ai.crt;
ssl_certificate_key /etc/ssl/eval_ai.key;
ssl_prefer_server_ciphers on;
# enables all versions of TLS, but not SSLv2 or 3 which are weak and now deprecated.
ssl_protocols TLSv1 TLSv1.1 TLSv1.2;

access_log /var/log/nginx/access.log;
error_log /var/log/nginx/error.log;

location /prometheus {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://prometheus;
}

location /grafana {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://grafana;
}

location /statsd {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://statsd_exporter;
}

location /alert_manager {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://alert_manager;
}
}
30 changes: 30 additions & 0 deletions monitoring/prometheus/prometheus_production.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
global:
scrape_interval: 30s
external_labels:
monitor: 'evalai-monitor'

rule_files:
- /etc/rules/rules.yml

scrape_configs:
- job_name: 'prometheus'
metrics_path: '/prometheus/metrics'
static_configs:
- targets: ['localhost:9090']

- job_name: 'statsd'
metrics_path: '/statsd/metrics'
static_configs:
- targets: ['monitoring.eval.ai']

- job_name: 'node_exporter'
metrics_path: '/node_exporter'
static_configs:
- targets: ['eval.ai']

alerting:
alertmanagers:
- path_prefix: '/alert_manager'
scheme: 'https'
static_configs:
- targets: ['monitoring.eval.ai']
1 change: 1 addition & 0 deletions requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pickleshare==0.7.5
Pillow==7.1.0
psycopg2==2.8.4
pycurl==7.43.0.6
PyJWT==2.1.0
PyYaml==5.1
proc==1.0
rstr==2.2.6
Expand Down
4 changes: 2 additions & 2 deletions scripts/deployment/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ case $opt in
eval $(aws ecr get-login --no-include-email)
aws s3 cp s3://cloudcv-secrets/evalai/${env}/docker_${env}.env ./docker/prod/docker_${env}.env
docker-compose -f docker-compose-${env}.yml rm -s -v -f
docker-compose -f docker-compose-${env}.yml pull django nodejs celery
docker-compose -f docker-compose-${env}.yml up -d --force-recreate --remove-orphans django nodejs nodejs_v2 celery
docker-compose -f docker-compose-${env}.yml pull django nodejs celery nodejs_v2 node-exporter
docker-compose -f docker-compose-${env}.yml up -d --force-recreate --remove-orphans django nodejs nodejs_v2 celery node-exporter
ENDSSH2
ssh ubuntu@${MONITORING_INSTANCE} -o StrictHostKeyChecking=no AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID} COMMIT_ID=${COMMIT_ID} env=${env} 'bash -s' <<-'ENDSSH2'
source venv/bin/activate
Expand Down
1 change: 1 addition & 0 deletions settings/staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
"https://staging-evalai.s3.amazonaws.com",
"https://staging.eval.ai",
"https://monitoring-staging.eval.ai",
"https://monitoring.eval.ai",
)

0 comments on commit 8b7fbfe

Please sign in to comment.