Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make canarie-api independent of cron and proxy #18

Merged
merged 18 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ CHANGES
`Unreleased <https://github.com/Ouranosinc/CanarieAPI/tree/master>`_ (latest)
------------------------------------------------------------------------------------

* Separate the application, cron job, and proxy (nginx) into separate containers so that they can be run independently.
* Add option to independently enable/disable the ``parse_logs`` cron job by setting the ``PARSE_LOGS`` configuration
option to ``True``.
* Do not handle log rotation for nginx anymore. Nginx should handle this on its own.
* Made default port 2000 everywhere (not just in docker).
* Replace Docker image reference ``nginx:bullseye`` by ``nginx:stable-bullseye`` which gets updated more often,
for latest security vulnerability fixes.

Expand Down
30 changes: 9 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nginx:stable-bullseye
FROM python:3.11
LABEL description="CanarieAPI: Self describing REST service for Canarie registry."
LABEL maintainer="David Byrns <[email protected]>, Francis Charette-Migneault <[email protected]>"
LABEL vendor="Ouranosinc, CRIM"
Expand All @@ -8,7 +8,7 @@ ENV PKG_DIR=/opt/local/src/CanarieAPI
WORKDIR ${PKG_DIR}

# Add logparser cron job in the cron directory
ADD canarieapi-cron /etc/cron.d/canarieapi-cron
ADD docker/canarieapi-cron /etc/cron.d/canarieapi-cron
RUN chmod 0644 /etc/cron.d/canarieapi-cron

# Install dependencies
Expand All @@ -17,12 +17,8 @@ COPY requirements.txt setup.* README.rst CHANGES.rst ${PKG_DIR}/
RUN apt-get update \
&& apt-get install -y \
build-essential \
python3-dev \
python3-pip \
cron \
sqlite3 \
&& ln -s $(which pip3) /usr/local/bin/pip \
&& ln -s $(which python3) /usr/bin/python \
&& pip install --no-cache-dir --upgrade pip setuptools gunicorn gevent \
&& pip install --no-cache-dir --upgrade -r ${PKG_DIR}/requirements.txt \
&& pip install --no-cache-dir -e ${PKG_DIR}
Expand All @@ -31,19 +27,11 @@ RUN apt-get update \
COPY ./ ${PKG_DIR}/
RUN pip install --no-dependencies ${PKG_DIR}

# cron job will inherit the current user environment
# start cron service
# start nginx service
# start gunicorn
CMD env >> /etc/environment \
&& cron \
&& gunicorn \
-b 0.0.0.0:2000 \
--workers 1 \
--log-level=DEBUG \
--timeout 30 \
--daemon \
-k gevent \
canarieapi.wsgi \
&& nginx \
-g "daemon off;"
CMD gunicorn \
-b 0.0.0.0:2000 \
--workers 1 \
--log-level=DEBUG \
--timeout 30 \
-k gevent \
canarieapi.wsgi
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -447,11 +447,13 @@ docker-stop:
@echo "Stopping test docker container: $(APP_DOCKER_TEST)"
-docker container stop -t 5 "$(APP_DOCKER_TEST)" 2>/dev/null || true
-docker rm $(APP_DOCKER_TEST) 2>/dev/null || true
-docker volume rm "$(APP_DOCKER_TEST)"-data 2>/dev/null || true

.PHONY: docker-test
docker-test: docker-build docker-stop docker-clean
@echo "Smoke test of docker image: $(DOCKER_TAG)"
docker run --pull never --name "$(APP_DOCKER_TEST)" -p 2000:2000 -d "$(APP_DOCKER_TAG)"
docker run --pull never --name "$(APP_DOCKER_TEST)" -v "$(APP_DOCKER_TEST)"-data:/data -p 2000:2000 -d \
"$(APP_DOCKER_TAG)"
@sleep 2
@echo "Testing docker image package installation..."
(docker exec "$(APP_DOCKER_TEST)" \
Expand Down
2 changes: 0 additions & 2 deletions canarieapi-cron

This file was deleted.

33 changes: 18 additions & 15 deletions canarieapi/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
get_db()

CronAccessStats = TypedDict("CronAccessStats", {
"invocations": int,
"invocations": int, # count | Not monitored
"last_access": str, # ISO datetime | Never
"last_log_update": str, # ISO datetime | Never
"last_status_update": str, # ISO datetime | Never
Expand Down Expand Up @@ -329,6 +329,11 @@ def stats(route_name: str, api_type: APIType) -> ResponseReturnValue:

db = get_db()

service_stats = [
(api_type, route_name),
("lastReset", START_UTC_TIME.isoformat() + "Z"),
]

# Gather service(s) status
all_status = collect_monitoring_statuses(route_name, database=db)

Expand All @@ -338,7 +343,8 @@ def stats(route_name: str, api_type: APIType) -> ResponseReturnValue:
error_info = collections.OrderedDict([
(svc_monitor, {
"status": Status.pretty_msg(svc_info["status"]),
"message": svc_info["message"] or (msg_ok if svc_info["status"] == Status.ok else "Undefined Error"),
"message": svc_info["message"] or (
msg_ok if svc_info["status"] == Status.ok else "Undefined Error"),
})
for svc_monitor, svc_info in all_status.items()
])
Expand All @@ -356,25 +362,22 @@ def stats(route_name: str, api_type: APIType) -> ResponseReturnValue:
)
return error_html, 503

# Gather route stats
monitor_info = []
cron_info = collect_cron_access_stats(route_name, database=db)

monitor_info = [
("lastAccess", cron_info["last_access"]),
("lastInvocationsUpdate", cron_info["last_log_update"]),
("lastStatusUpdate", cron_info["last_status_update"])
]
if APP.config.get("PARSE_LOGS", True):
service_stats.append(("invocations", cron_info["invocations"]))
monitor_info.append(("lastInvocationsUpdate", cron_info["last_log_update"]))
monitor_info.append(("lastAccess", cron_info["last_access"]))

monitor_info.append(("lastStatusUpdate", cron_info["last_status_update"]))

for service, svc_info in all_status.items():
monitor_info.append((service, Status.pretty_msg(svc_info["status"])))

monitor_info = collections.OrderedDict(monitor_info)
service_stats.append(("monitoring", monitor_info))

service_stats = [
(api_type, route_name),
("lastReset", START_UTC_TIME.isoformat() + "Z"),
("invocations", cron_info["invocations"]),
("monitoring", monitor_info)
]
service_stats = collections.OrderedDict(service_stats)

if request_wants_json():
Expand Down Expand Up @@ -457,4 +460,4 @@ def close_connection(_: Exception) -> None:

if __name__ == "__main__":
APP.debug = False
APP.run(port=5000)
APP.run(port=2000)
10 changes: 6 additions & 4 deletions canarieapi/default_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
"CANARIE_API_CONFIG_FN" to the path of your own copy before launching the program.
"""

MY_SERVER_NAME = "http://localhost:5000"
MY_SERVER_NAME = "http://localhost:2000"
SERVER_MAIN_TITLE = "Canarie API"

# If this is True, canarie-api will parse the nginx logs in DATABASE["access_log"] and report statistics
PARSE_LOGS = True

DATABASE = {
"filename": "/opt/local/src/CanarieAPI/stats.db",
"access_log": "/var/log/nginx/access_file.log",
"log_pid": "/var/run/nginx.pid"
"filename": "/data/stats.db",
"access_log": "/logs/nginx-access.log"
Comment on lines -14 to +18
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This causes breaking changes.
Need to consider MAJOR revision.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree

}

SERVICES = {
Expand Down
64 changes: 24 additions & 40 deletions canarieapi/logparser.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,19 @@
# -- Standard lib ------------------------------------------------------------
import logging
import logging.handlers
import os
import re
import signal
import sqlite3
import time
from typing import Dict, Optional, Union

# -- 3rd party ---------------------------------------------------------------
from dateutil.parser import parse as dt_parse
fmigneault marked this conversation as resolved.
Show resolved Hide resolved

# -- Project specific --------------------------------------------------------
from canarieapi.app_object import APP
from canarieapi.utility_rest import get_db, retry_db_error_after_init

LOG_BACKUP_COUNT = 150

RouteStatistics = Dict[str, Dict[str, Union[str, int]]]


def rotate_log(filename: str) -> None:
logger: logging.Logger = APP.logger
logger.info("Rotating %s", filename)

# Base on a rotation every 10 minutes, if we want to keep 1 day worth of logs we have to keep about 150 of them
rfh = logging.handlers.RotatingFileHandler(filename, backupCount=LOG_BACKUP_COUNT)
rfh.doRollover()

pid_file = APP.config["DATABASE"]["log_pid"]
logger.info("Asking nginx to reload log file (using pid file : %s", pid_file)
try:
with open(pid_file, "rb") as pid_f:
pid = pid_f.read()
logger.info("nginx pid is %s", int(pid))
except IOError:
# If nginx is not running no needs to force the log reload
logger.warning("No pid found!")
return

# Send SIGUSR1 to nginx to force the log reload
logger.info("Sending USR1 (to reload log) signal to nginx process")
os.kill(int(pid), signal.SIGUSR1)
time.sleep(1)


def parse_log(filename: str) -> RouteStatistics:
def parse_log(filename: str, database: Optional[sqlite3.Connection] = None) -> RouteStatistics:
# Load config
logger = APP.logger
logger.info("Loading configuration")
Expand All @@ -66,6 +37,17 @@ def parse_log(filename: str) -> RouteStatistics:
logger.error("Exception occurs while trying to compile regex of %s", route)
raise

# get the last entry from the logs in order to not duplicate entries if the same log file is read multiple times
with APP.app_context():
db = database or get_db()
cur = db.cursor()
cur.execute("select last_access from stats order by last_access desc limit 1")
records = cur.fetchone()
if records:
last_access = dt_parse(records[0][0])
fmigneault marked this conversation as resolved.
Show resolved Hide resolved
else:
last_access = None

# Load access log
logger.info("Loading log file : %s", filename)
log_regex = re.compile(r".*\[(?P<datetime>.*)\] \"(?P<method>[A-Z]+) (?P<route>/.*) .*") # pylint: disable=C4001
Expand All @@ -74,7 +56,9 @@ def parse_log(filename: str) -> RouteStatistics:
for line in f:
match = log_regex.match(line)
if match:
log_records.append(match.groupdict())
records = match.groupdict()
if last_access is None or dt_parse(records["datetime"]) > last_access:
log_records.append(records)

# Compile stats
logger.info("Compiling stats from %s records", len(log_records))
Expand Down Expand Up @@ -119,12 +103,12 @@ def update_db(route_stats: RouteStatistics, database: Optional[sqlite3.Connectio


def cron_job() -> None:
logger = APP.logger
logger.info("Cron job for parsing server log")
access_log_fn = APP.config["DATABASE"]["access_log"]
rotate_log(access_log_fn)
update_db(parse_log(access_log_fn + ".1"))
logger.info("Done")
if APP.config.get("PARSE_LOGS", True):
logger = APP.logger
logger.info("Cron job for parsing server log")
access_log_fn = APP.config["DATABASE"]["access_log"]
update_db(parse_log(access_log_fn))
logger.info("Done")


if __name__ == "__main__":
Expand Down
12 changes: 5 additions & 7 deletions canarieapi/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"DATABASE": {
"description": "Parameters about database and its data source",
"type": "object",
"required": ["filename", "access_log", "log_pid"],
"required": ["filename", "access_log"],
"additionalProperties": false,
"properties": {
"filename": {
Expand All @@ -23,10 +23,6 @@
"access_log": {
"description": "NGINX log file location",
"type": "string"
},
"log_pid": {
"description": "NGINX pid file location",
"type": "string"
}
}
},
Expand All @@ -51,7 +47,8 @@
"service_description_schema": {
"description": "Describe a service",
"type": "object",
"required": ["info", "stats", "redirect", "monitoring"],
"$comment": "When the PARSE_LOG configuration option is True, \"stats\" will be dynamically added to the list of required properties",
"required": ["info", "redirect", "monitoring"],
"additionalProperties": false,
"properties": {
"info": {"$ref": "#/definitions/service_info_schema"},
Expand All @@ -63,7 +60,8 @@
"platform_description_schema": {
"description": "Describe a platform",
"type": "object",
"required": ["info", "stats", "redirect", "monitoring"],
"$comment": "When the PARSE_LOG configuration option is True, \"stats\" will be dynamically added to the list of required properties",
"required": ["info", "redirect", "monitoring"],
"additionalProperties": false,
"properties": {
"info": {"$ref": "#/definitions/platform_info_schema"},
Expand Down
57 changes: 31 additions & 26 deletions canarieapi/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -- Standard lib ------------------------------------------------------------
import copy
import json
import logging
import os
Expand All @@ -8,45 +9,49 @@

# -- Project specific --------------------------------------------------------
from canarieapi.app_object import APP
from canarieapi.logparser import LOG_BACKUP_COUNT, parse_log
from canarieapi.logparser import parse_log
from canarieapi.monitoring import monitor

# The schema that must be respected by the config
with open(os.path.join(os.path.dirname(__file__), "schema.json"), mode="r", encoding="utf-8") as schema_file:
CONFIGURATION_SCHEMA = json.load(schema_file)


def validate_config_schema(update_db):
# type: (bool) -> None
def validate_config_schema(update_db: bool, run_jobs: bool = True) -> None:
config = APP.config
logger: logging.Logger = APP.logger

logger.info("Testing configuration...")

configuration_schema = copy.deepcopy(CONFIGURATION_SCHEMA)
if config.get("PARSE_LOG", True):
configuration_schema["definitions"]["service_description_schema"]["required"].append("stats")
configuration_schema["definitions"]["platform_description_schema"]["required"].append("stats")
fmigneault marked this conversation as resolved.
Show resolved Hide resolved
try:
jsonschema.validate(config, CONFIGURATION_SCHEMA)
jsonschema.validate(config, configuration_schema)
except jsonschema.ValidationError as exc:
raise jsonschema.ValidationError(f"The configuration is invalid : {exc!s}")

monitor(update_db=update_db)

access_log_fn = config["DATABASE"]["access_log"]
route_invocations = {}
file_checked = 0
for i in range(0, min(10, LOG_BACKUP_COUNT)):
fn = access_log_fn + (f".{i}" if i > 0 else "")
try:
route_stats = parse_log(fn)
for route, value in route_stats.items():
route_invocations[route] = route_invocations.get(route, 0) + value["count"]
file_checked += 1
except IOError:
break

for route, invocs in route_invocations.items():
if invocs > 0:
logger.info("Found %s invocations to route %s in %s log files", invocs, route, file_checked)
else:
logger.warning("Found no invocations to route %s in %s log files", route, file_checked)
if not route_invocations:
logger.warning("Found no invocations at all in %s log files", file_checked)
if run_jobs:
monitor(update_db=update_db)

if config.get("PARSE_LOG", True):
access_log_fn = config["DATABASE"]["access_log"]
route_invocations = {}
file_checked = 0
try:
route_stats = parse_log(access_log_fn)
except IOError:
logger.warning("Unable to read logs from %s", access_log_fn)
else:
for route, value in route_stats.items():
route_invocations[route] = route_invocations.get(route, 0) + value["count"]

for route, invocs in route_invocations.items():
if invocs > 0:
logger.info("Found %s invocations to route %s in %s log files", invocs, route, file_checked)
else:
logger.warning("Found no invocations to route %s in %s log files", route, file_checked)
if not route_invocations:
logger.warning("Found no invocations at all in %s log files", file_checked)
logger.info("Tests completed!")
Loading