From 8c840dfe22f54fafb753323414dc82df2e324c8d Mon Sep 17 00:00:00 2001 From: F-G Fernandez Date: Sun, 13 Dec 2020 22:55:11 +0100 Subject: [PATCH] feat: Added web server for deployment (#27) * refactor: Updated package import hierarchy * chore: Removed unused dependencies * feat: Added dummy cache mechanism * feat: Added extra information logging * feat: Added FastAPI web server for deployment * chore: Added docker orchestration * feat: Added date as a route argument * refactor: Reversed cache to prevent system running OOM * refactor: Silenced urllib warnings * chore: Added Heroku setup file * chore: Build debug * chore: Updated heroku setup file * chore: Added apt prebuild * chore: Build debug * chore: Fixed procfile * chore: Added back requirements * refactor: Reflected package import fix * refactor: Removed unnecessary dependencies * chore: Added workflow to check web server sanity * style: Fixed lint * refactor: Removed unused import * chore: Fixed CI config * chore: Fixed workflows * feat: Added possibility to load env variables from .env * docs: Updated README --- .github/workflows/web-server.yml | 20 ++++++++++++++ Aptfile | 2 ++ Dockerfile | 28 +++++++++++++++++++ Procfile | 1 + README.md | 22 ++++++++++++++- app/api/inference.py | 7 +++++ app/api/routes/risk.py | 14 ++++++++++ app/api/schemas.py | 8 ++++++ app/config.py | 16 +++++++++++ app/main.py | 40 ++++++++++++++++++++++++++++ docker-compose.yml | 13 +++++++++ pyro_risks/__init__.py | 2 ++ pyro_risks/config.py | 9 +++++++ pyro_risks/datasets/era_fwi_viirs.py | 13 +++++++-- pyro_risks/datasets/queries_api.py | 31 ++++++++++++++++++--- pyro_risks/models/__init__.py | 2 ++ pyro_risks/models/predict.py | 3 +++ pyro_risks/models/score_v0.py | 4 +++ requirements-app.txt | 3 +++ requirements.txt | 4 +-- setup.py | 4 +-- 21 files changed, 234 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/web-server.yml create mode 100644 Aptfile create mode 100644 Dockerfile create mode 100644 Procfile create mode 100644 app/api/inference.py create mode 100644 app/api/routes/risk.py create mode 100644 app/api/schemas.py create mode 100644 app/config.py create mode 100644 app/main.py create mode 100644 docker-compose.yml create mode 100644 pyro_risks/models/__init__.py create mode 100644 requirements-app.txt diff --git a/.github/workflows/web-server.yml b/.github/workflows/web-server.yml new file mode 100644 index 0000000..b644a81 --- /dev/null +++ b/.github/workflows/web-server.yml @@ -0,0 +1,20 @@ +name: web-server + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + docker-ready: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build & run docker + env: + CDS_UID: ${{ secrets.CDS_UID }} + CDS_API_KEY: ${{ secrets.CDS_API_KEY }} + run: PORT=8003 docker-compose up -d --build + - name: Ping app inside the container + run: sleep 5 && nc -vz localhost 8003 diff --git a/Aptfile b/Aptfile new file mode 100644 index 0000000..3590f35 --- /dev/null +++ b/Aptfile @@ -0,0 +1,2 @@ +libspatialindex-dev +python3-rtree diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a4d914b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM python:3.8.1 + +# set work directory +WORKDIR /usr/src/app + +# set environment variables +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# copy app requirements +COPY ./requirements.txt requirements.txt +COPY ./requirements-app.txt /usr/src/app/requirements-app.txt +COPY ./setup.py setup.py +COPY ./README.md README.md +COPY ./pyro_risks pyro_risks + +# install dependencies +RUN apt-get update && \ + apt-get install --no-install-recommends -y libspatialindex-dev python3-rtree && \ + pip install --upgrade pip setuptools wheel && \ + pip install -e . && \ + pip install -r /usr/src/app/requirements-app.txt && \ + mkdir /usr/src/app/app && \ + rm -rf /root/.cache/pip && \ + rm -rf /var/lib/apt/lists/* + +# copy project +COPY app/ /usr/src/app/app/ diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..91e5c97 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: pip install -e . && pip install -r requirements-app.txt && uvicorn --reload --workers 1 --host 0.0.0.0 --port=${PORT:-5000} app.main:app diff --git a/README.md b/README.md index 6de834a..51c5844 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,16 @@ pip install git+git://github.com/pyronear/pyro-risks ## Usage -### datasets +Beforehand, you will need to set a few environment variables either manually or by writing an `.env` file in the root directory of this project, like in the example below: + +``` +CDS_UID=my_secret_uid +CDS_API_KEY=my_very_secret_key +``` +Those values will allow your web server to connect to CDS [API](https://github.com/ecmwf/cdsapi), which is mandatory for your datasets access to be fully operational. + + +### pyro_risks.datasets Access all pyro-risks datasets. @@ -57,6 +66,17 @@ firms = NASAFIRMS() noaa = NOAAWeather() ``` +### Web server + +To be able to expose model inference, you can run a web server using docker containers with this command: + +```bash +PORT=8003 docker-compose up -d --build +``` + +Once completed, you will notice that you have a docker container running on the port you selected, which can process requests just like any web server. + + ## Examples You are free to merge the datasets however you want and to implement any zonal statistic you want, but some are already provided for reference. In order to use them check the example scripts options as follows: diff --git a/app/api/inference.py b/app/api/inference.py new file mode 100644 index 0000000..f8411e7 --- /dev/null +++ b/app/api/inference.py @@ -0,0 +1,7 @@ +from pyro_risks.models.predict import PyroRisk + + +__all__ = ['predictor'] + + +predictor = PyroRisk(which='RF') diff --git a/app/api/routes/risk.py b/app/api/routes/risk.py new file mode 100644 index 0000000..9032ea6 --- /dev/null +++ b/app/api/routes/risk.py @@ -0,0 +1,14 @@ +from typing import List +from fastapi import APIRouter +from app.api.inference import predictor +from app.api.schemas import RegionRisk + + +router = APIRouter() + + +@router.get("/{country}/{date}", response_model=List[RegionRisk], summary="Computes the wildfire risk") +async def get_pyrorisk(country: str, date: str): + """Using the country identifier, this will compute the wildfire risk for all known subregions""" + preds = predictor.predict(date) + return [RegionRisk(geocode=k, score=v['score'], explainability=v['explainability']) for k, v in preds.items()] diff --git a/app/api/schemas.py b/app/api/schemas.py new file mode 100644 index 0000000..55f795f --- /dev/null +++ b/app/api/schemas.py @@ -0,0 +1,8 @@ +from typing import Optional +from pydantic import BaseModel, Field + + +class RegionRisk(BaseModel): + geocode: str = Field(..., example="01") + score: float = Field(..., gt=0, lt=1, example=0.5) + explainability: Optional[str] = Field(None, example="weather") diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..434536e --- /dev/null +++ b/app/config.py @@ -0,0 +1,16 @@ +import os +import secrets + + +PROJECT_NAME: str = 'PyroRisk' +PROJECT_DESCRIPTION: str = 'Wildfire risk estimation' +VERSION: str = "0.1.0a0" +DEBUG: bool = os.environ.get('DEBUG', '') != 'False' +LOGO_URL: str = "https://pyronear.org/img/logo_letters.png" + + +SECRET_KEY: str = secrets.token_urlsafe(32) +if DEBUG: + # To keep the same Auth at every app loading in debug mode and not having to redo the auth. + debug_secret_key = "000000000000000000000000000000000000" + SECRET_KEY = debug_secret_key diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..3c0d741 --- /dev/null +++ b/app/main.py @@ -0,0 +1,40 @@ +import time +from fastapi import FastAPI, Request +from fastapi.openapi.utils import get_openapi + +from app import config as cfg +from app.api.routes import risk + + +app = FastAPI(title=cfg.PROJECT_NAME, description=cfg.PROJECT_DESCRIPTION, debug=cfg.DEBUG, version=cfg.VERSION) + +# Routing +app.include_router(risk.router, prefix="/risk", tags=["risk"]) + + +# Middleware +@app.middleware("http") +async def add_process_time_header(request: Request, call_next): + start_time = time.time() + response = await call_next(request) + process_time = time.time() - start_time + response.headers["X-Process-Time"] = str(process_time) + return response + + +# Docs +def custom_openapi(): + if app.openapi_schema: + return app.openapi_schema + openapi_schema = get_openapi( + title=cfg.PROJECT_NAME, + version=cfg.VERSION, + description=cfg.PROJECT_DESCRIPTION, + routes=app.routes, + ) + openapi_schema["info"]["x-logo"] = {"url": cfg.LOGO_URL} + app.openapi_schema = openapi_schema + return app.openapi_schema + + +app.openapi = custom_openapi diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..a05ba86 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3.7' + +services: + web: + build: . + command: uvicorn app.main:app --reload --workers 1 --host 0.0.0.0 --port 8000 + volumes: + - ./app/:/usr/src/app/app/ + ports: + - ${PORT}:8000 + environment: + - CDS_UID=${CDS_UID} + - CDS_API_KEY=${CDS_API_KEY} diff --git a/pyro_risks/__init__.py b/pyro_risks/__init__.py index 58f3ace..3ce2db8 100644 --- a/pyro_risks/__init__.py +++ b/pyro_risks/__init__.py @@ -1 +1,3 @@ from .version import __version__ +from pyro_risks import datasets +from pyro_risks import models diff --git a/pyro_risks/config.py b/pyro_risks/config.py index 78421ba..10dffb4 100644 --- a/pyro_risks/config.py +++ b/pyro_risks/config.py @@ -1,4 +1,9 @@ import os +from dotenv import load_dotenv + +# If there is an .env, load it +load_dotenv() + FR_GEOJSON: str = "https://france-geojson.gregoiredavid.fr/repo/departements.geojson" DATA_FALLBACK: str = ( @@ -67,3 +72,7 @@ 'objective': 'binary:logistic', 'eval_metric': ['logloss', 'aucpr'] } + +CACHE_FOLDER: str = ".cache" +if not os.path.exists(CACHE_FOLDER): + os.makedirs(CACHE_FOLDER) diff --git a/pyro_risks/datasets/era_fwi_viirs.py b/pyro_risks/datasets/era_fwi_viirs.py index e099df4..0748534 100644 --- a/pyro_risks/datasets/era_fwi_viirs.py +++ b/pyro_risks/datasets/era_fwi_viirs.py @@ -1,12 +1,16 @@ +import logging +import pandas as pd + from pyro_risks.datasets import NASAFIRMS_VIIRS, ERA5Land from pyro_risks.datasets.utils import get_intersection_range from pyro_risks.datasets.fwi import GwisFwi -import pandas as pd - __all__ = ["MergedEraFwiViirs"] +logger = logging.getLogger("uvicorn.info") + + def process_dataset_to_predict(fwi, era): """Groupby and merge fwi and era5 datasets for model predictions. @@ -29,6 +33,8 @@ def process_dataset_to_predict(fwi, era): agg_fwi_df.columns = ['day', 'nom'] + \ [x[0] + '_' + x[1] for x in agg_fwi_df.columns if x[1] != ''] + logger.info("Finished aggregationg of FWI") + # Group weather dataframe by day and department and compute min, max, mean, std agg_wth_df = weather.groupby(['time', 'nom'])[ 'u10', 'v10', 'd2m', 't2m', 'fal', 'lai_hv', 'lai_lv', 'skt', @@ -38,10 +44,13 @@ def process_dataset_to_predict(fwi, era): agg_wth_df.columns = ['day', 'nom'] + \ [x[0] + '_' + x[1] for x in agg_wth_df.columns if x[1] != ''] + logger.info("Finished aggregationg of weather data") + # Merge fwi and weather together res_df = pd.merge(agg_fwi_df, agg_wth_df, on=['day', 'nom'], how='inner') + logger.info("Finished merging") return res_df diff --git a/pyro_risks/datasets/queries_api.py b/pyro_risks/datasets/queries_api.py index 99bb7c5..48c4921 100644 --- a/pyro_risks/datasets/queries_api.py +++ b/pyro_risks/datasets/queries_api.py @@ -1,9 +1,15 @@ import cdsapi import os +import logging +import urllib3 from pyro_risks import config as cfg +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +logger = logging.getLogger("uvicorn.info") + + def call_era5land(output_path: str, year: str, month: str, day: str) -> None: """Call cdpaspi to get ERA5Land data as file nc format for given date. @@ -16,6 +22,12 @@ def call_era5land(output_path: str, year: str, month: str, day: str) -> None: month: str day: str """ + file_path = os.path.join(output_path, f"era5land_{year}_{month}_{day}.nc") + + if os.path.exists(file_path): + logger.info(f"Using cached {file_path}") + return + c = cdsapi.Client(url=cfg.CDS_URL, key=f"{cfg.CDS_UID}:{cfg.CDS_API_KEY}", verify=0) c.retrieve( @@ -85,7 +97,7 @@ def call_era5land(output_path: str, year: str, month: str, day: str) -> None: ], "format": "netcdf", }, - os.path.join(output_path, f"era5land_{year}_{month}_{day}.nc"), + file_path, ) @@ -102,6 +114,12 @@ def call_era5t(output_path: str, year: str, month: str, day: str) -> None: month: str day: str """ + file_path = os.path.join(output_path, f"era5t_{year}_{month}_{day}.nc") + + if os.path.exists(file_path): + logger.info(f"Using cached {file_path}") + return + c = cdsapi.Client(url=cfg.CDS_URL, key=f"{cfg.CDS_UID}:{cfg.CDS_API_KEY}", verify=0) c.retrieve( @@ -387,7 +405,7 @@ def call_era5t(output_path: str, year: str, month: str, day: str) -> None: ], "format": "netcdf", }, - os.path.join(output_path, f"era5t_{year}_{month}_{day}.nc"), + file_path, ) # TODO : take only needed variables for the model @@ -407,6 +425,13 @@ def call_fwi(output_path, year, month, day): month: str day: str """ + + file_path = os.path.join(output_path, f"fwi_{year}_{month}_{day}.zip") + + if os.path.exists(file_path): + logger.info(f"Using cached {file_path}") + return + c = cdsapi.Client(url=cfg.CDS_URL, key=f"{cfg.CDS_UID}:{cfg.CDS_API_KEY}", verify=0) c.retrieve( @@ -430,4 +455,4 @@ def call_fwi(output_path, year, month, day): 'product_type': 'reanalysis', 'day': day, }, - os.path.join(output_path, f"fwi_{year}_{month}_{day}.zip")) + file_path) diff --git a/pyro_risks/models/__init__.py b/pyro_risks/models/__init__.py new file mode 100644 index 0000000..8c62d61 --- /dev/null +++ b/pyro_risks/models/__init__.py @@ -0,0 +1,2 @@ +from .predict import * +from .score_v0 import * diff --git a/pyro_risks/models/predict.py b/pyro_risks/models/predict.py index 83a6a31..dd62cb8 100644 --- a/pyro_risks/models/predict.py +++ b/pyro_risks/models/predict.py @@ -8,6 +8,9 @@ from pyro_risks.models.score_v0 import add_lags +__all__ = ['PyroRisk'] + + class PyroRisk(object): """Pyronear risk score for fire danger on French departments. diff --git a/pyro_risks/models/score_v0.py b/pyro_risks/models/score_v0.py index 72fb916..e495559 100644 --- a/pyro_risks/models/score_v0.py +++ b/pyro_risks/models/score_v0.py @@ -7,6 +7,10 @@ import numpy as np +__all__ = ['prepare_dataset', 'target_correlated_features', 'split_train_test', 'add_lags', 'train_random_forest', + 'xgb_model'] + + SELECTED_DEP = ['Aisne', 'Alpes-Maritimes', 'Ardèche', diff --git a/requirements-app.txt b/requirements-app.txt new file mode 100644 index 0000000..281e37b --- /dev/null +++ b/requirements-app.txt @@ -0,0 +1,3 @@ +fastapi==0.61.1 +uvicorn>=0.11.1 +pyro_risks diff --git a/requirements.txt b/requirements.txt index 5345ff6..f2927c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,13 +6,11 @@ netCDF4>=1.5.4 requests>=2.24.0 xlrd==1.2.0 -docutils>=0.16 -sphinx>=3.1.2 numpy>=1.18.5 -setuptools>=49.2.0 xarray>=0.16.1 scipy>=1.5.4 scikit-learn>=0.23.2 xgboost==1.2.1 cdsapi==0.4.0 +python-dotenv>=0.15.0 diff --git a/setup.py b/setup.py index 3a3dce5..ac5cd0f 100644 --- a/setup.py +++ b/setup.py @@ -46,11 +46,9 @@ "scikit-learn>=0.23.2", "xgboost==1.2.1", "xlrd==1.2.0", - "docutils>=0.16", - "sphinx>=3.1.2", "numpy>=1.18.5", - "setuptools>=49.2.0", "cdsapi==0.4.0", + "python-dotenv>=0.15.0", ] setup(