Skip to content

Commit

Permalink
feat(gunicorn): Updated image to use gunicorn and new base image (#209)
Browse files Browse the repository at this point in the history
### Improvements
- Update to use new Amazon Linux base image and use the same structure as our other python services. 
- Utilizing "gen3" user instead of "root" for more secure containers
- Moving to Poetry to manage our virtual environments 
- Multi-stage Docker builds for smaller images
- Move to Gunicorn

---------

Co-authored-by: Edward Malinowski <[email protected]>
Co-authored-by: Jawad Qureshi <[email protected]>
Co-authored-by: EliseCastle23 <[email protected]>
Co-authored-by: George Thomas <[email protected]>
  • Loading branch information
5 people authored Nov 18, 2024
1 parent f64f5fc commit 906dcd8
Show file tree
Hide file tree
Showing 14 changed files with 668 additions and 753 deletions.
8 changes: 6 additions & 2 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_baseline_file",
"filename": ".secrets.baseline"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
Expand Down Expand Up @@ -123,7 +127,7 @@
"filename": "bin/settings.py",
"hashed_secret": "347cd9c53ff77d41a7b22aa56c7b4efaf54658e3",
"is_verified": false,
"line_number": 46
"line_number": 54
}
],
"peregrine/blueprints/coremetadata.py": [
Expand Down Expand Up @@ -273,5 +277,5 @@
}
]
},
"generated_at": "2023-11-01T14:27:13Z"
"generated_at": "2024-11-01T18:38:15Z"
}
81 changes: 30 additions & 51 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,63 +1,42 @@
# To run:
# - Create and fill out `creds.json`:
# {
# "fence_host": "",
# "fence_username": "",
# "fence_password": "",
# "fence_database": "",
# "db_host": "",
# "db_username": "",
# "db_password": "",
# "db_database": "",
# "gdcapi_secret_key": "",
# "hostname": ""
# }
# - Build the image: `docker build . -t peregrine -f Dockerfile`
# - Run: `docker run -v /full/path/to/creds.json:/var/www/peregrine/creds.json -p 81:80 peregrines`
# To check running container: `docker exec -it peregrine /bin/bash`

FROM quay.io/cdis/python:python3.9-buster-2.0.0
ARG AZLINUX_BASE_VERSION=master

FROM quay.io/cdis/python-nginx-al:${AZLINUX_BASE_VERSION} AS base

ENV appname=peregrine

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential libffi-dev musl-dev gcc libxml2-dev libxslt-dev \
curl bash git vim
RUN pip install --upgrade pip poetry
WORKDIR /${appname}

RUN chown -R gen3:gen3 /${appname}

# Builder stage
FROM base AS builder

RUN dnf install -y python3-devel postgresql-devel gcc

USER gen3

COPY poetry.lock pyproject.toml /${appname}/

RUN poetry install -vv --only main --no-interaction

RUN mkdir -p /var/www/$appname \
&& mkdir -p /var/www/.cache/Python-Eggs/ \
&& mkdir /run/nginx/ \
&& ln -sf /dev/stdout /var/log/nginx/access.log \
&& ln -sf /dev/stderr /var/log/nginx/error.log \
&& chown nginx -R /var/www/.cache/Python-Eggs/ \
&& chown nginx /var/www/$appname
COPY --chown=gen3:gen3 . /${appname}

EXPOSE 80
# Run poetry again so this app itself gets installed too
RUN poetry install --without dev --no-interaction

WORKDIR /$appname
RUN git config --global --add safe.directory /${appname} && COMMIT=`git rev-parse HEAD` && echo "COMMIT=\"${COMMIT}\"" > /${appname}/version_data.py \
&& VERSION=`git describe --always --tags` && echo "VERSION=\"${VERSION}\"" >> /${appname}/version_data.py

# copy ONLY poetry artifact, install the dependencies but not indexd
# this will make sure than the dependencies is cached
COPY poetry.lock pyproject.toml /$appname/
RUN poetry config virtualenvs.create false \
&& poetry install -vv --no-root --no-dev --no-interaction \
&& poetry show -v
# Final stage
FROM base

# copy source code ONLY after installing dependencies
COPY . /$appname
COPY ./deployment/uwsgi/uwsgi.ini /etc/uwsgi/uwsgi.ini
COPY ./bin/settings.py /var/www/$appname/settings.py
COPY ./bin/confighelper.py /var/www/$appname/confighelper.py
RUN yum install -y postgresql-libs

# install peregrine
RUN poetry config virtualenvs.create false \
&& poetry install -vv --no-dev --no-interaction \
&& poetry show -v
COPY --from=builder /${appname} /${appname}

RUN COMMIT=`git rev-parse HEAD` && echo "COMMIT=\"${COMMIT}\"" >$appname/version_data.py \
&& VERSION=`git describe --always --tags` && echo "VERSION=\"${VERSION}\"" >>$appname/version_data.py
# Switch to non-root user 'gen3' for the serving process
USER gen3

WORKDIR /var/www/$appname
WORKDIR /${appname}

CMD /dockerrun.sh
CMD ["/bin/bash", "-c", "/${appname}/dockerrun.bash"]
86 changes: 36 additions & 50 deletions bin/settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from peregrine.api import app, app_init
from os import environ
import confighelper
import bin.confighelper as confighelper

APP_NAME = "peregrine"

Expand All @@ -12,74 +12,60 @@ def load_json(file_name):
conf_data = load_json("creds.json")
config = app.config

config["AUTH"] = "https://auth.service.consul:5000/v3/"
config["AUTH_ADMIN_CREDS"] = None
config["INTERNAL_AUTH"] = None

# ARBORIST deprecated, replaced by ARBORIST_URL
# ARBORIST_URL is initialized in app_init() directly
config["ARBORIST"] = "http://arborist-service/"

# Signpost: deprecated, replaced by index client.
config["SIGNPOST"] = {
"host": environ.get("SIGNPOST_HOST") or "http://indexd-service",
"version": "v0",
"auth": ("gdcapi", conf_data.get("indexd_password", "{{indexd_password}}")),
}

config["INDEX_CLIENT"] = {
"host": environ.get("INDEX_CLIENT_HOST") or "http://indexd-service",
"version": "v0",
"auth": ("gdcapi", conf_data.get("indexd_password", "{{indexd_password}}")),
# The user should be "sheepdog", but for legacy reasons, we use "gdcapi" instead
"auth": (
(
environ.get("INDEXD_USER", "gdcapi"),
environ.get("INDEXD_PASS")
or conf_data.get("indexd_password", "{{indexd_password}}"),
)
),
}
config["FAKE_AUTH"] = False

config["PSQLGRAPH"] = {
"host": conf_data.get("db_host", "{{db_host}}"),
"user": conf_data.get("db_username", "{{db_username}}"),
"password": conf_data.get("db_password", "{{db_password}}"),
"database": conf_data.get("db_database", "{{db_database}}"),
"host": environ.get("PGHOST") or conf_data.get("db_host", "{{db_host}}"),
"user": environ.get("PGUSER") or conf_data.get("db_username", "{{db_username}}"),
"password": environ.get("PGPASSWORD")
or conf_data.get("db_password", "{{db_password}}"),
"database": environ.get("PGDB") or conf_data.get("db_database", "{{db_database}}"),
}

config["HMAC_ENCRYPTION_KEY"] = conf_data.get("hmac_key", "{{hmac_key}}")
config["FLASK_SECRET_KEY"] = conf_data.get("gdcapi_secret_key", "{{gdcapi_secret_key}}")
config["PSQL_USER_DB_CONNECTION"] = "postgresql://%s:%s@%s:5432/%s" % tuple(
[
conf_data.get(key, key)
for key in ["fence_username", "fence_password", "fence_host", "fence_database"]
]
fence_username = environ.get("FENCE_DB_USER") or conf_data.get(
"fence_username", "{{fence_username}}"
)
fence_password = environ.get("FENCE_DB_PASS") or conf_data.get(
"fence_password", "{{fence_password}}"
)
fence_host = environ.get("FENCE_DB_HOST") or conf_data.get(
"fence_host", "{{fence_host}}"
)
fence_database = environ.get("FENCE_DB_DBNAME") or conf_data.get(
"fence_database", "{{fence_database}}"
)
config["PSQL_USER_DB_CONNECTION"] = "postgresql://%s:%s@%s:5432/%s" % (
fence_username,
fence_password,
fence_host,
fence_database,
)


config["DICTIONARY_URL"] = environ.get(
"DICTIONARY_URL",
"https://s3.amazonaws.com/dictionary-artifacts/datadictionary/develop/schema.json",
)

config["SUBMISSION"] = {"bucket": conf_data.get("bagit_bucket", "{{bagit_bucket}}")}

config["STORAGE"] = {
"s3": {
"access_key": conf_data.get("s3_access", "{{s3_access}}"),
"secret_key": conf_data.get("s3_secret", "{{s3_secret}}"),
}
}

config["OIDC_ISSUER"] = "https://%s/user" % conf_data["hostname"]

config["OAUTH2"] = {
"client_id": conf_data.get("oauth2_client_id", "{{oauth2_client_id}}"),
"client_secret": conf_data.get("oauth2_client_secret", "{{oauth2_client_secret}}"),
"api_base_url": "https://%s/user/" % conf_data["hostname"],
"authorize_url": "https://%s/user/oauth2/authorize" % conf_data["hostname"],
"access_token_url": "https://%s/user/oauth2/token" % conf_data["hostname"],
"refresh_token_url": "https://%s/user/oauth2/token" % conf_data["hostname"],
"client_kwargs": {
"redirect_uri": "https://%s/api/v0/oauth2/authorize" % conf_data["hostname"],
"scope": "openid data user",
},
# deprecated key values, should be removed after all commons use new oidc
"internal_oauth_provider": "http://fence-service/oauth2/",
"oauth_provider": "https://%s/user/oauth2/" % conf_data["hostname"],
"redirect_uri": "https://%s/api/v0/oauth2/authorize" % conf_data["hostname"],
}
hostname = environ.get("CONF_HOSTNAME") or conf_data["hostname"]
config["OIDC_ISSUER"] = "https://%s/user" % hostname

config["USER_API"] = config["OIDC_ISSUER"] # for use by authutils
# use the USER_API URL instead of the public issuer URL to accquire JWT keys
Expand Down
2 changes: 1 addition & 1 deletion bin/setup_notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""

from sqlalchemy import create_engine
from gdcdatamodel.models.notifications import Base
from gen3datamodel.models.notifications import Base


def setup(host, user, password, database):
Expand Down
2 changes: 1 addition & 1 deletion bin/setup_psqlgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from sqlalchemy import create_engine
import logging

from gdcdatamodel.models import *
from gen3datamodel.models import *
from psqlgraph import create_all, Node, Edge


Expand Down
2 changes: 1 addition & 1 deletion bin/setup_transactionlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import argparse
from sqlalchemy import create_engine
from gdcdatamodel.models.submission import Base
from gen3datamodel.models.submission import Base


def setup(host, user, password, database):
Expand Down
34 changes: 0 additions & 34 deletions deployment/uwsgi/uwsgi.ini

This file was deleted.

6 changes: 6 additions & 0 deletions deployment/wsgi/gunicorn.conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
wsgi_app = "bin.settings:application"
bind = "0.0.0.0:8000"
workers = 1
user = "gen3"
group = "gen3"
timeout = 300
4 changes: 4 additions & 0 deletions dockerrun.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

nginx
poetry run gunicorn -c /peregrine/deployment/wsgi/gunicorn.conf.py
8 changes: 2 additions & 6 deletions peregrine/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def dictionary_init(app):

d = gdcdictionary.gdcdictionary
dictionary.init(d)
from gdcdatamodel import models as md
from gdcdatamodel import validators as vd
from gen3datamodel import models as md
from gen3datamodel import validators as vd

datamodelutils.validators.init(vd)
datamodelutils.models.init(md)
Expand Down Expand Up @@ -133,10 +133,6 @@ def app_init(app):
submission.graphql.make_graph_traversal_dict(app)
app.graphql_schema = submission.graphql.get_schema()
app.schema_file = submission.generate_schema_file(app.graphql_schema, app.logger)
try:
app.secret_key = app.config["FLASK_SECRET_KEY"]
except KeyError:
app.logger.error("Secret key not set in config! Authentication will not work")
async_pool_init(app)

# ARBORIST deprecated, replaced by ARBORIST_URL
Expand Down
6 changes: 3 additions & 3 deletions peregrine/models.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""
This module generalizes the data model used by the peregrine blueprint, and
must be initialized using another ``models`` module to set the attributes of
this module. For example, using ``gdcdatamodel.models`` as the models:
this module. For example, using ``gen3datamodel.models`` as the models:
.. code-block:: python
peregrine.models.init(gdcdatamodel.models)
peregrine.models.init(gen3datamodel.models)
Then this module can be imported elsewhere in ``peregrine``:
.. code-block:: python
from peregrine import models
# This is effectively an alias of ``gdcdatamodel.models.Project``.
# This is effectively an alias of ``gen3datamodel.models.Project``.
models.Project
"""

Expand Down
4 changes: 2 additions & 2 deletions peregrine/resources/submission/graphql/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
peregrine.resources.submission.graphql.node
----------------------------------
Implements GraphQL queries for each gdcdatamodel.model node type
Implements GraphQL queries for each gen3datamodel.model node type
using the Graphene GraphQL library
"""

Expand Down Expand Up @@ -424,7 +424,7 @@ def resolve_node(self, info, **args):
:returns:
A list of graphene object classes (e.g. a Case query object
(not a gdcdatamodel Case)).
(not a gen3datamodel Case)).
"""

Expand Down
Loading

0 comments on commit 906dcd8

Please sign in to comment.