Skip to content

Commit

Permalink
cache & refactor updates (ExpDev07#310)
Browse files Browse the repository at this point in the history
* reduce cache size (#21)
* formatting (120 length -> 100)
* csbs to/from Redis
* nyt to/from Redis
* ignore locustfile
* unused coordinates
* cache redis json serialization error
* fix nyt redis serialization error
* refactor Timeline class to pydantic model
* partial recovery fix (#31)
* fix jhu timeline init call
* update requirements
* update isort usage

Co-authored-by: DeepSource Bot <[email protected]>
Co-authored-by: Thanh Nguyen <[email protected]>
Co-authored-by: codedawi <[email protected]>
Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com>
Co-authored-by: Sourcery AI <[email protected]>
  • Loading branch information
6 people authored Oct 12, 2020
1 parent 1508dce commit 69c2d02
Show file tree
Hide file tree
Showing 25 changed files with 504 additions and 388 deletions.
10 changes: 10 additions & 0 deletions .deepsource.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version = 1

test_patterns = ["tests/**"]

[[analyzers]]
name = "python"
enabled = true

[analyzers.meta]
runtime_version = "3.x.x"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ htmlcov/
nosetests.xml
coverage.xml
*,cover
locustfile.py

# Translations
*.mo
Expand Down
301 changes: 152 additions & 149 deletions Pipfile.lock

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion app/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from ..services.location.nyt import NYTLocationService

# Mapping of services to data-sources.
DATA_SOURCES = {"jhu": JhuLocationService(), "csbs": CSBSLocationService(), "nyt": NYTLocationService()}
DATA_SOURCES = {
"jhu": JhuLocationService(),
"csbs": CSBSLocationService(),
"nyt": NYTLocationService(),
}


def data_source(source):
Expand Down
13 changes: 11 additions & 2 deletions app/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@


def save(
name: str, content: Union[str, Dict, List], write_mode: str = "w", indent: int = 2, **json_dumps_kwargs
name: str,
content: Union[str, Dict, List],
write_mode: str = "w",
indent: int = 2,
**json_dumps_kwargs,
) -> pathlib.Path:
"""Save content to a file. If content is a dictionary, use json.dumps()."""
path = DATA / name
Expand All @@ -35,7 +39,12 @@ class AIO:

@classmethod
async def save(
cls, name: str, content: Union[str, Dict, List], write_mode: str = "w", indent: int = 2, **json_dumps_kwargs
cls,
name: str,
content: Union[str, Dict, List],
write_mode: str = "w",
indent: int = 2,
**json_dumps_kwargs,
):
"""Save content to a file. If content is a dictionary, use json.dumps()."""
path = DATA / name
Expand Down
8 changes: 6 additions & 2 deletions app/location/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class Location: # pylint: disable=too-many-instance-attributes
"""

def __init__(
self, id, country, province, coordinates, last_updated, confirmed, deaths, recovered
self, id, country, province, coordinates, last_updated, confirmed, deaths, recovered,
): # pylint: disable=too-many-arguments
# General info.
self.id = id
Expand Down Expand Up @@ -66,7 +66,11 @@ def serialize(self):
# Last updated.
"last_updated": self.last_updated,
# Latest data (statistics).
"latest": {"confirmed": self.confirmed, "deaths": self.deaths, "recovered": self.recovered},
"latest": {
"confirmed": self.confirmed,
"deaths": self.deaths,
"recovered": self.recovered,
},
}


Expand Down
40 changes: 9 additions & 31 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
import pydantic
import sentry_sdk
import uvicorn
from fastapi import FastAPI, Request, Response, openapi
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from scout_apm.async_.starlette import ScoutMiddleware
from sentry_sdk.integrations.asgi import SentryAsgiMiddleware

Expand All @@ -35,9 +34,9 @@
"API for tracking the global coronavirus (COVID-19, SARS-CoV-2) outbreak."
" Project page: https://github.com/ExpDev07/coronavirus-tracker-api."
),
version="2.0.3",
docs_url=None,
redoc_url=None,
version="2.0.4",
docs_url="/",
redoc_url="/docs",
on_startup=[setup_client_session],
on_shutdown=[teardown_client_session],
)
Expand All @@ -60,7 +59,11 @@

# Enable CORS.
APP.add_middleware(
CORSMiddleware, allow_credentials=True, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"],
CORSMiddleware,
allow_credentials=True,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
APP.add_middleware(GZipMiddleware, minimum_size=1000)

Expand Down Expand Up @@ -109,31 +112,6 @@ async def handle_validation_error(
# Include routers.
APP.include_router(V1, prefix="", tags=["v1"])
APP.include_router(V2, prefix="/v2", tags=["v2"])
APP.mount("/static", StaticFiles(directory="static"), name="static")

# ##############
# Swagger/Redocs
# ##############


@APP.get("/", include_in_schema=False)
async def custom_swagger_ui_html():
"""Serve Swagger UI."""
return openapi.docs.get_swagger_ui_html(
openapi_url=APP.openapi_url,
title=f"{APP.title} - Swagger UI",
oauth2_redirect_url=APP.swagger_ui_oauth2_redirect_url,
swagger_js_url="/static/swagger-ui-bundle.js",
swagger_css_url="/static/swagger-ui.css",
)


@APP.get("/docs", include_in_schema=False)
async def redoc_html():
"""Serve ReDoc UI."""
return openapi.docs.get_redoc_html(
openapi_url=APP.openapi_url, title=f"{APP.title} - ReDoc", redoc_js_url="/static/redoc.standalone.js",
)


# Running of app.
Expand Down
21 changes: 19 additions & 2 deletions app/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""app.models.py"""
from typing import Dict, List

from pydantic import BaseModel
from pydantic import BaseModel, validator


class Latest(BaseModel):
Expand All @@ -27,9 +27,26 @@ class Timeline(BaseModel):
Timeline model.
"""

latest: int
timeline: Dict[str, int] = {}

@validator("timeline")
@classmethod
def sort_timeline(cls, value):
"""Sort the timeline history before inserting into the model"""
return dict(sorted(value.items()))

@property
def latest(self):
"""Get latest available history value."""
return list(self.timeline.values())[-1] if self.timeline else 0

def serialize(self):
"""
Serialize the model into dict
TODO: override dict() instead of using serialize
"""
return {**self.dict(), "latest": self.latest}


class Timelines(BaseModel):
"""
Expand Down
6 changes: 5 additions & 1 deletion app/routers/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ async def all_categories():
"deaths": deaths,
"recovered": recovered,
# Latest.
"latest": {"confirmed": confirmed["latest"], "deaths": deaths["latest"], "recovered": recovered["latest"],},
"latest": {
"confirmed": confirmed["latest"],
"deaths": deaths["latest"],
"recovered": recovered["latest"],
},
}


Expand Down
14 changes: 11 additions & 3 deletions app/routers/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,17 @@ async def get_locations(

# Do filtering.
try:
locations = [location for location in locations if str(getattr(location, key)).lower() == str(value)]
locations = [
location
for location in locations
if str(getattr(location, key)).lower() == str(value)
]
except AttributeError:
pass
if not locations:
raise HTTPException(404, detail=f"Source `{source}` does not have the desired location data.")
raise HTTPException(
404, detail=f"Source `{source}` does not have the desired location data.",
)

# Return final serialized data.
return {
Expand All @@ -84,7 +90,9 @@ async def get_locations(

# pylint: disable=invalid-name
@V2.get("/locations/{id}", response_model=LocationResponse)
async def get_location_by_id(request: Request, id: int, source: Sources = "jhu", timelines: bool = True):
async def get_location_by_id(
request: Request, id: int, source: Sources = "jhu", timelines: bool = True
):
"""
Getting specific location by id.
"""
Expand Down
95 changes: 53 additions & 42 deletions app/services/location/csbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from asyncache import cached
from cachetools import TTLCache

from ...caches import check_cache, load_cache
from ...coordinates import Coordinates
from ...location.csbs import CSBSLocation
from ...utils import httputils
Expand Down Expand Up @@ -34,7 +35,7 @@ async def get(self, loc_id): # pylint: disable=arguments-differ
BASE_URL = "https://facts.csbs.org/covid-19/covid19_county.csv"


@cached(cache=TTLCache(maxsize=1, ttl=3600))
@cached(cache=TTLCache(maxsize=1, ttl=1800))
async def get_locations():
"""
Retrieves county locations; locations are cached for 1 hour
Expand All @@ -44,48 +45,58 @@ async def get_locations():
"""
data_id = "csbs.locations"
LOGGER.info(f"{data_id} Requesting data...")
async with httputils.CLIENT_SESSION.get(BASE_URL) as response:
text = await response.text()

LOGGER.debug(f"{data_id} Data received")

data = list(csv.DictReader(text.splitlines()))
LOGGER.debug(f"{data_id} CSV parsed")

locations = []

for i, item in enumerate(data):
# General info.
state = item["State Name"]
county = item["County Name"]

# Ensure country is specified.
if county in {"Unassigned", "Unknown"}:
continue

# Coordinates.
coordinates = Coordinates(item["Latitude"], item["Longitude"]) # pylint: disable=unused-variable

# Date string without "EDT" at end.
last_update = " ".join(item["Last Update"].split(" ")[0:2])

# Append to locations.
locations.append(
CSBSLocation(
# General info.
i,
state,
county,
# Coordinates.
Coordinates(item["Latitude"], item["Longitude"]),
# Last update (parse as ISO).
datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z",
# Statistics.
int(item["Confirmed"] or 0),
int(item["Death"] or 0),
# check shared cache
cache_results = await check_cache(data_id)
if cache_results:
LOGGER.info(f"{data_id} using shared cache results")
locations = cache_results
else:
LOGGER.info(f"{data_id} shared cache empty")
async with httputils.CLIENT_SESSION.get(BASE_URL) as response:
text = await response.text()

LOGGER.debug(f"{data_id} Data received")

data = list(csv.DictReader(text.splitlines()))
LOGGER.debug(f"{data_id} CSV parsed")

locations = []

for i, item in enumerate(data):
# General info.
state = item["State Name"]
county = item["County Name"]

# Ensure country is specified.
if county in {"Unassigned", "Unknown"}:
continue

# Date string without "EDT" at end.
last_update = " ".join(item["Last Update"].split(" ")[0:2])

# Append to locations.
locations.append(
CSBSLocation(
# General info.
i,
state,
county,
# Coordinates.
Coordinates(item["Latitude"], item["Longitude"]),
# Last update (parse as ISO).
datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z",
# Statistics.
int(item["Confirmed"] or 0),
int(item["Death"] or 0),
)
)
)
LOGGER.info(f"{data_id} Data normalized")
LOGGER.info(f"{data_id} Data normalized")
# save the results to distributed cache
# TODO: fix json serialization
try:
await load_cache(data_id, locations)
except TypeError as type_err:
LOGGER.error(type_err)

# Return the locations.
return locations
Loading

0 comments on commit 69c2d02

Please sign in to comment.