Skip to content

Commit

Permalink
Merge pull request #18 from mishaschwartz/more-flexibility
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault authored Aug 29, 2023
2 parents c52263f + 386028d commit a5e1c8c
Show file tree
Hide file tree
Showing 16 changed files with 187 additions and 124 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ CHANGES
`Unreleased <https://github.com/Ouranosinc/CanarieAPI/tree/master>`_ (latest)
------------------------------------------------------------------------------------

* Separate the application, cron job, and proxy (nginx) into separate containers so that they can be run independently.
* Add option to independently enable/disable the ``parse_logs`` cron job by setting the ``PARSE_LOGS`` configuration
option to ``True``.
* Do not handle log rotation for nginx anymore. Nginx should handle this on its own.
* Made default port 2000 everywhere (not just in docker).
* Replace Docker image reference ``nginx:bullseye`` by ``nginx:stable-bullseye`` which gets updated more often,
for latest security vulnerability fixes.

Expand Down
30 changes: 9 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nginx:stable-bullseye
FROM python:3.11
LABEL description="CanarieAPI: Self describing REST service for Canarie registry."
LABEL maintainer="David Byrns <[email protected]>, Francis Charette-Migneault <[email protected]>"
LABEL vendor="Ouranosinc, CRIM"
Expand All @@ -8,7 +8,7 @@ ENV PKG_DIR=/opt/local/src/CanarieAPI
WORKDIR ${PKG_DIR}

# Add logparser cron job in the cron directory
ADD canarieapi-cron /etc/cron.d/canarieapi-cron
ADD docker/canarieapi-cron /etc/cron.d/canarieapi-cron
RUN chmod 0644 /etc/cron.d/canarieapi-cron

# Install dependencies
Expand All @@ -17,12 +17,8 @@ COPY requirements.txt setup.* README.rst CHANGES.rst ${PKG_DIR}/
RUN apt-get update \
&& apt-get install -y \
build-essential \
python3-dev \
python3-pip \
cron \
sqlite3 \
&& ln -s $(which pip3) /usr/local/bin/pip \
&& ln -s $(which python3) /usr/bin/python \
&& pip install --no-cache-dir --upgrade pip setuptools gunicorn gevent \
&& pip install --no-cache-dir --upgrade -r ${PKG_DIR}/requirements.txt \
&& pip install --no-cache-dir -e ${PKG_DIR}
Expand All @@ -31,19 +27,11 @@ RUN apt-get update \
COPY ./ ${PKG_DIR}/
RUN pip install --no-dependencies ${PKG_DIR}

# cron job will inherit the current user environment
# start cron service
# start nginx service
# start gunicorn
CMD env >> /etc/environment \
&& cron \
&& gunicorn \
-b 0.0.0.0:2000 \
--workers 1 \
--log-level=DEBUG \
--timeout 30 \
--daemon \
-k gevent \
canarieapi.wsgi \
&& nginx \
-g "daemon off;"
CMD gunicorn \
-b 0.0.0.0:2000 \
--workers 1 \
--log-level=DEBUG \
--timeout 30 \
-k gevent \
canarieapi.wsgi
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -447,11 +447,13 @@ docker-stop:
@echo "Stopping test docker container: $(APP_DOCKER_TEST)"
-docker container stop -t 5 "$(APP_DOCKER_TEST)" 2>/dev/null || true
-docker rm $(APP_DOCKER_TEST) 2>/dev/null || true
-docker volume rm "$(APP_DOCKER_TEST)"-data 2>/dev/null || true

.PHONY: docker-test
docker-test: docker-build docker-stop docker-clean
@echo "Smoke test of docker image: $(DOCKER_TAG)"
docker run --pull never --name "$(APP_DOCKER_TEST)" -p 2000:2000 -d "$(APP_DOCKER_TAG)"
docker run --pull never --name "$(APP_DOCKER_TEST)" -v "$(APP_DOCKER_TEST)"-data:/data -p 2000:2000 -d \
"$(APP_DOCKER_TAG)"
@sleep 2
@echo "Testing docker image package installation..."
(docker exec "$(APP_DOCKER_TEST)" \
Expand Down
2 changes: 0 additions & 2 deletions canarieapi-cron

This file was deleted.

33 changes: 18 additions & 15 deletions canarieapi/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
get_db()

CronAccessStats = TypedDict("CronAccessStats", {
"invocations": int,
"invocations": int, # count | Not monitored
"last_access": str, # ISO datetime | Never
"last_log_update": str, # ISO datetime | Never
"last_status_update": str, # ISO datetime | Never
Expand Down Expand Up @@ -329,6 +329,11 @@ def stats(route_name: str, api_type: APIType) -> ResponseReturnValue:

db = get_db()

service_stats = [
(api_type, route_name),
("lastReset", START_UTC_TIME.isoformat() + "Z"),
]

# Gather service(s) status
all_status = collect_monitoring_statuses(route_name, database=db)

Expand All @@ -338,7 +343,8 @@ def stats(route_name: str, api_type: APIType) -> ResponseReturnValue:
error_info = collections.OrderedDict([
(svc_monitor, {
"status": Status.pretty_msg(svc_info["status"]),
"message": svc_info["message"] or (msg_ok if svc_info["status"] == Status.ok else "Undefined Error"),
"message": svc_info["message"] or (
msg_ok if svc_info["status"] == Status.ok else "Undefined Error"),
})
for svc_monitor, svc_info in all_status.items()
])
Expand All @@ -356,25 +362,22 @@ def stats(route_name: str, api_type: APIType) -> ResponseReturnValue:
)
return error_html, 503

# Gather route stats
monitor_info = []
cron_info = collect_cron_access_stats(route_name, database=db)

monitor_info = [
("lastAccess", cron_info["last_access"]),
("lastInvocationsUpdate", cron_info["last_log_update"]),
("lastStatusUpdate", cron_info["last_status_update"])
]
if APP.config.get("PARSE_LOGS", True):
service_stats.append(("invocations", cron_info["invocations"]))
monitor_info.append(("lastInvocationsUpdate", cron_info["last_log_update"]))
monitor_info.append(("lastAccess", cron_info["last_access"]))

monitor_info.append(("lastStatusUpdate", cron_info["last_status_update"]))

for service, svc_info in all_status.items():
monitor_info.append((service, Status.pretty_msg(svc_info["status"])))

monitor_info = collections.OrderedDict(monitor_info)
service_stats.append(("monitoring", monitor_info))

service_stats = [
(api_type, route_name),
("lastReset", START_UTC_TIME.isoformat() + "Z"),
("invocations", cron_info["invocations"]),
("monitoring", monitor_info)
]
service_stats = collections.OrderedDict(service_stats)

if request_wants_json():
Expand Down Expand Up @@ -457,4 +460,4 @@ def close_connection(_: Exception) -> None:

if __name__ == "__main__":
APP.debug = False
APP.run(port=5000)
APP.run(port=2000)
10 changes: 6 additions & 4 deletions canarieapi/default_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
"CANARIE_API_CONFIG_FN" to the path of your own copy before launching the program.
"""

MY_SERVER_NAME = "http://localhost:5000"
MY_SERVER_NAME = "http://localhost:2000"
SERVER_MAIN_TITLE = "Canarie API"

# If this is True, canarie-api will parse the nginx logs in DATABASE["access_log"] and report statistics
PARSE_LOGS = True

DATABASE = {
"filename": "/opt/local/src/CanarieAPI/stats.db",
"access_log": "/var/log/nginx/access_file.log",
"log_pid": "/var/run/nginx.pid"
"filename": "/data/stats.db",
"access_log": "/logs/nginx-access.log"
}

SERVICES = {
Expand Down
64 changes: 24 additions & 40 deletions canarieapi/logparser.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,19 @@
# -- Standard lib ------------------------------------------------------------
import logging
import logging.handlers
import os
import re
import signal
import sqlite3
import time
from typing import Dict, Optional, Union

# -- 3rd party ---------------------------------------------------------------
from dateutil.parser import parse as dt_parse

# -- Project specific --------------------------------------------------------
from canarieapi.app_object import APP
from canarieapi.utility_rest import get_db, retry_db_error_after_init

LOG_BACKUP_COUNT = 150

RouteStatistics = Dict[str, Dict[str, Union[str, int]]]


def rotate_log(filename: str) -> None:
logger: logging.Logger = APP.logger
logger.info("Rotating %s", filename)

# Base on a rotation every 10 minutes, if we want to keep 1 day worth of logs we have to keep about 150 of them
rfh = logging.handlers.RotatingFileHandler(filename, backupCount=LOG_BACKUP_COUNT)
rfh.doRollover()

pid_file = APP.config["DATABASE"]["log_pid"]
logger.info("Asking nginx to reload log file (using pid file : %s", pid_file)
try:
with open(pid_file, "rb") as pid_f:
pid = pid_f.read()
logger.info("nginx pid is %s", int(pid))
except IOError:
# If nginx is not running no needs to force the log reload
logger.warning("No pid found!")
return

# Send SIGUSR1 to nginx to force the log reload
logger.info("Sending USR1 (to reload log) signal to nginx process")
os.kill(int(pid), signal.SIGUSR1)
time.sleep(1)


def parse_log(filename: str) -> RouteStatistics:
def parse_log(filename: str, database: Optional[sqlite3.Connection] = None) -> RouteStatistics:
# Load config
logger = APP.logger
logger.info("Loading configuration")
Expand All @@ -66,6 +37,17 @@ def parse_log(filename: str) -> RouteStatistics:
logger.error("Exception occurs while trying to compile regex of %s", route)
raise

# get the last entry from the logs in order to not duplicate entries if the same log file is read multiple times
with APP.app_context():
db = database or get_db()
cur = db.cursor()
cur.execute("select last_access from stats order by last_access desc limit 1")
records = cur.fetchone()
if records:
last_access = dt_parse(records[0][0])
else:
last_access = None

# Load access log
logger.info("Loading log file : %s", filename)
log_regex = re.compile(r".*\[(?P<datetime>.*)\] \"(?P<method>[A-Z]+) (?P<route>/.*) .*") # pylint: disable=C4001
Expand All @@ -74,7 +56,9 @@ def parse_log(filename: str) -> RouteStatistics:
for line in f:
match = log_regex.match(line)
if match:
log_records.append(match.groupdict())
records = match.groupdict()
if last_access is None or dt_parse(records["datetime"]) > last_access:
log_records.append(records)

# Compile stats
logger.info("Compiling stats from %s records", len(log_records))
Expand Down Expand Up @@ -119,12 +103,12 @@ def update_db(route_stats: RouteStatistics, database: Optional[sqlite3.Connectio


def cron_job() -> None:
logger = APP.logger
logger.info("Cron job for parsing server log")
access_log_fn = APP.config["DATABASE"]["access_log"]
rotate_log(access_log_fn)
update_db(parse_log(access_log_fn + ".1"))
logger.info("Done")
if APP.config.get("PARSE_LOGS", True):
logger = APP.logger
logger.info("Cron job for parsing server log")
access_log_fn = APP.config["DATABASE"]["access_log"]
update_db(parse_log(access_log_fn))
logger.info("Done")


if __name__ == "__main__":
Expand Down
12 changes: 5 additions & 7 deletions canarieapi/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"DATABASE": {
"description": "Parameters about database and its data source",
"type": "object",
"required": ["filename", "access_log", "log_pid"],
"required": ["filename", "access_log"],
"additionalProperties": false,
"properties": {
"filename": {
Expand All @@ -23,10 +23,6 @@
"access_log": {
"description": "NGINX log file location",
"type": "string"
},
"log_pid": {
"description": "NGINX pid file location",
"type": "string"
}
}
},
Expand All @@ -51,7 +47,8 @@
"service_description_schema": {
"description": "Describe a service",
"type": "object",
"required": ["info", "stats", "redirect", "monitoring"],
"$comment": "When the PARSE_LOG configuration option is True, \"stats\" will be dynamically added to the list of required properties",
"required": ["info", "redirect", "monitoring"],
"additionalProperties": false,
"properties": {
"info": {"$ref": "#/definitions/service_info_schema"},
Expand All @@ -63,7 +60,8 @@
"platform_description_schema": {
"description": "Describe a platform",
"type": "object",
"required": ["info", "stats", "redirect", "monitoring"],
"$comment": "When the PARSE_LOG configuration option is True, \"stats\" will be dynamically added to the list of required properties",
"required": ["info", "redirect", "monitoring"],
"additionalProperties": false,
"properties": {
"info": {"$ref": "#/definitions/platform_info_schema"},
Expand Down
57 changes: 31 additions & 26 deletions canarieapi/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -- Standard lib ------------------------------------------------------------
import copy
import json
import logging
import os
Expand All @@ -8,45 +9,49 @@

# -- Project specific --------------------------------------------------------
from canarieapi.app_object import APP
from canarieapi.logparser import LOG_BACKUP_COUNT, parse_log
from canarieapi.logparser import parse_log
from canarieapi.monitoring import monitor

# The schema that must be respected by the config
with open(os.path.join(os.path.dirname(__file__), "schema.json"), mode="r", encoding="utf-8") as schema_file:
CONFIGURATION_SCHEMA = json.load(schema_file)


def validate_config_schema(update_db):
# type: (bool) -> None
def validate_config_schema(update_db: bool, run_jobs: bool = True) -> None:
config = APP.config
logger: logging.Logger = APP.logger

logger.info("Testing configuration...")

configuration_schema = copy.deepcopy(CONFIGURATION_SCHEMA)
if config.get("PARSE_LOG", True):
configuration_schema["definitions"]["service_description_schema"]["required"].append("stats")
configuration_schema["definitions"]["platform_description_schema"]["required"].append("stats")
try:
jsonschema.validate(config, CONFIGURATION_SCHEMA)
jsonschema.validate(config, configuration_schema)
except jsonschema.ValidationError as exc:
raise jsonschema.ValidationError(f"The configuration is invalid : {exc!s}")

monitor(update_db=update_db)

access_log_fn = config["DATABASE"]["access_log"]
route_invocations = {}
file_checked = 0
for i in range(0, min(10, LOG_BACKUP_COUNT)):
fn = access_log_fn + (f".{i}" if i > 0 else "")
try:
route_stats = parse_log(fn)
for route, value in route_stats.items():
route_invocations[route] = route_invocations.get(route, 0) + value["count"]
file_checked += 1
except IOError:
break

for route, invocs in route_invocations.items():
if invocs > 0:
logger.info("Found %s invocations to route %s in %s log files", invocs, route, file_checked)
else:
logger.warning("Found no invocations to route %s in %s log files", route, file_checked)
if not route_invocations:
logger.warning("Found no invocations at all in %s log files", file_checked)
if run_jobs:
monitor(update_db=update_db)

if config.get("PARSE_LOG", True):
access_log_fn = config["DATABASE"]["access_log"]
route_invocations = {}
file_checked = 0
try:
route_stats = parse_log(access_log_fn)
except IOError:
logger.warning("Unable to read logs from %s", access_log_fn)
else:
for route, value in route_stats.items():
route_invocations[route] = route_invocations.get(route, 0) + value["count"]

for route, invocs in route_invocations.items():
if invocs > 0:
logger.info("Found %s invocations to route %s in %s log files", invocs, route, file_checked)
else:
logger.warning("Found no invocations to route %s in %s log files", route, file_checked)
if not route_invocations:
logger.warning("Found no invocations at all in %s log files", file_checked)
logger.info("Tests completed!")
Loading

0 comments on commit a5e1c8c

Please sign in to comment.