Skip to content

Commit

Permalink
✨(project) add data7 CLI
Browse files Browse the repository at this point in the history
To ease adoption/on boarding, we need a CLI that facilitates data7
project bootstraping and running.
  • Loading branch information
jmaupetit committed Jul 1, 2024
1 parent af456eb commit 6e85364
Show file tree
Hide file tree
Showing 15 changed files with 571 additions and 74 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/cli.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: cli

on:
push:
branches: ["main"]
pull_request:
branches: ["main"]

permissions:
contents: read

jobs:
commands:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
- name: Install data7
run: pip install .
- name: Init data7 project
run: |
mkdir /tmp/data7
cd /tmp/data7
data7 init
- name: Download test database
working-directory: /tmp/data7
run: |
mkdir db
curl -Ls -o db/development.db \
https://github.com/lerocha/chinook-database/releases/download/v1.4.5/Chinook_Sqlite.sqlite
- name: Check configuration
working-directory: /tmp/data7
env:
ENV_FOR_DYNACONF: development
run: data7 check
- name: Run the server
working-directory: /tmp/data7
env:
ENV_FOR_DYNACONF: development
run: timeout --preserve-status 5 data7 run
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ venv.bak/

# -- Tools
.coverage
logging-config.yaml

# dynaconf files
.secrets.yaml
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ and this project adheres to

## [Unreleased]

### Added

- Implement `data7` CLI

## [0.1.0] - 2024-06-26

### Added
Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@ build: ## install project
poetry install
.PHONY: build

check: ## check data7 configuration
poetry run data7 check
.PHONY: check

run: ## run the api server
poetry run uvicorn "data7.app:app" --reload --log-level debug --log-config logging-config.yaml
poetry run data7 run --log-level debug
.PHONY: run

# -- API
Expand Down
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
# Data7 - Dynamic datasets the easy way

> Pronounced data·set (7 like _sept_ in French).
> Pronounced data·set (**7** like **sept** in French).
## The idea :bulb:
![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/jmaupetit/data7/quality.yml)
![PyPI - Version](https://img.shields.io/pypi/v/data7)


## The idea 💡

**TL;DR** Data7 is a high performance web server that generates dynamic datasets
(in [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) or
[Parquet](https://en.wikipedia.org/wiki/Apache_Parquet) formats) from existing
databases and stream them over HTTP :tada:
databases and stream them over HTTP 🎉

### Example usage

Expand Down
190 changes: 130 additions & 60 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ readme = "README.md"
databases = {extras = ["aiosqlite"], version = "^0.9.0"}
dynaconf = "^3.2.5"
pyarrow = "^16.1.0"
python = "^3.10"
python = "^3.11"
starlette = "^0.37.2"
typer = "^0.12.3"
uvicorn = {extras = ["standard"], version = "^0.30.1"}

[tool.poetry.group.dev.dependencies]
Expand All @@ -21,6 +22,10 @@ pytest = "^8.2.2"
pytest-cov = "^5.0.0"
pytest-httpx = "^0.30.0"
ruff = "^0.4.10"
types-pyyaml = "^6.0.12.20240311"

[tool.poetry.scripts]
data7 = "data7.cli:cli"

# Third party packages configuration
[tool.coverage.run]
Expand Down
4 changes: 4 additions & 0 deletions src/data7/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
"""Data7 root module."""

from . import config

__all__ = ["config"]
7 changes: 2 additions & 5 deletions src/data7/__main__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"""Data7 main entrypoint."""

import uvicorn

from .app import app
from .config import settings
from .cli import cli

if __name__ == "__main__":
uvicorn.run(app, host=settings.HOST, port=settings.PORT)
cli()
4 changes: 4 additions & 0 deletions src/data7/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class MimeType(StrEnum):

async def populate_datasets() -> List[Dataset]:
"""Validate configured datasets and get sql query expected field names."""
logging.debug("Will populate datasets given configuration...")
datasets = []

for raw_dataset in settings.datasets:
Expand All @@ -74,6 +75,7 @@ async def populate_datasets() -> List[Dataset]:

datasets.append(dataset)

logger.info("Active datasets: %s", ", ".join(d.basename for d in datasets))
return datasets


Expand Down Expand Up @@ -217,6 +219,8 @@ async def lifespan(app):

middleware = [Middleware(GZipMiddleware, minimum_size=1000)]

logger.info("Active extensions: %s", ", ".join(e for e in Extension))

app = Starlette(
routes=routes,
middleware=middleware,
Expand Down
234 changes: 234 additions & 0 deletions src/data7/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
"""Data7 Command Line Interface."""

import copy
import shutil
from enum import IntEnum, StrEnum
from pathlib import Path
from sqlite3 import OperationalError as SqliteOperationalError
from typing import Optional

import anyio
import databases
import typer
import uvicorn
import yaml
from anyio import run as async_run
from rich.console import Console
from rich.syntax import Syntax
from sqlalchemy.exc import OperationalError

import data7

cli = typer.Typer(name="data7", no_args_is_help=True, pretty_exceptions_short=True)
console = Console()


class ExitCodes(IntEnum):
"""data7 exit codes."""

OK: int = 0
INCOMPLETE_CONFIGURATION: int = 1
INVALID_CONFIGURATION: int = 2


class LogLevels(StrEnum):
"""Allowed log levels for the run command."""

DEBUG: str = "debug"
INFO: str = "info"
WARNING: str = "warning"
ERROR: str = "error"
CRITICAL: str = "critical"


@cli.command()
def init():
"""Initialize a data7 project."""
console.print(
"[cyan]Creating configuration files from distributed templates...[/cyan]\n"
)
# Get data7 distributed settings files
root_path = Path(data7.__file__).parent
for settings_file in sorted(root_path.glob("*.yaml.dist")):
dest = Path(settings_file.stem)
check = "❌"
remark = "skipped"
if not dest.exists():
shutil.copyfile(settings_file, dest)
check = "✅"
remark = ""
console.print(f" {check} {dest} [yellow]{remark}[/yellow]")

console.print("\n[cyan]Project ready to be configured 💫[/cyan]")

console.print(
"\n👉 Edit the [cyan].secrets.yaml[/cyan] file to set your database URL:\n"
)
console.print(
Syntax(
"""# .secrets.yaml
#
# Example settings for production environment
production:
DATABASE_URL: "postgresql+asyncpg://user:pass@server:port/acme"
""",
"yaml",
)
)

console.print(
"\n👉 Edit the [cyan]data7.yaml[/cyan] file to define your datasets:\n"
)
console.print(
Syntax(
"""# data7.yaml
#
# Example settings for production environment
production:
datasets:
- basename: "invoices"
query: "SELECT * FROM Invoices" \
""",
"yaml",
)
)

console.print("\n👉 Test your configuration by running the development server:\n")
console.print(Syntax("# Type the following command in a terminal\ndata7 run", "sh"))

console.print("\nIf everything went well, the webserver should start ✨\n")

console.print(
(
"💡 [i]If not, the [not i green]data7 check[/not i green] command should "
"give you hints about what went wrong.\n"
)
)


def check_settings_files_exist():
"""Check if all settings files exist."""
console.rule("[yellow]check[/yellow] // [bold cyan]settings files exist")

for setting_file in data7.config.SETTINGS_FILES:
if not Path(setting_file).exists():
console.print(f"❌ {setting_file} is missing")
raise typer.Exit(ExitCodes.INCOMPLETE_CONFIGURATION)
console.print(f"✅ {setting_file}")


def check_settings_files_format():
"""Check all settings files format as valid, safe YAML."""
console.rule("[yellow]check[/yellow] // [bold cyan]settings files format")

for setting_file in data7.config.SETTINGS_FILES:
try:
content = yaml.safe_load(Path(setting_file).read_text())
except yaml.parser.ParserError as err:
console.print(f"❌ {setting_file}")
console.print_exception(max_frames=1, suppress=[yaml])
raise typer.Exit(ExitCodes.INVALID_CONFIGURATION) from err
console.print(f"✅ {setting_file}")
console.print(content)


async def check_database_connection():
"""Check database URL connection."""
console.rule("[yellow]check[/yellow] // [bold cyan]database connection")

database_url = data7.config.settings.DATABASE_URL
with console.status("Connecting to database...", spinner="dots"):
database = databases.Database(database_url)
await database.connect()
await database.execute(query="SELECT 1")
await database.disconnect()
console.print("✅ database connection looks ok from here")
console.print("⚠️[yellow i] Note that there may be false positive for sqlite")


async def check_datasets_queries():
"""Check datasets defined queries."""
console.rule("[yellow]check[/yellow] // [bold cyan]datasets queries")

settings = data7.config.settings
database = databases.Database(settings.DATABASE_URL)
await database.connect()

for dataset in settings.datasets:
console.print(f"👉 [b cyan]{dataset.basename}")
console.print(f" [i]{dataset.query}")
try:
await database.fetch_one(dataset.query)
except (OperationalError, SqliteOperationalError) as err:
console.print("❌ Invalid database query")
console.print_exception(max_frames=1, suppress=[anyio, databases])
raise typer.Exit(ExitCodes.INVALID_CONFIGURATION) from err
console.print(" ✅ valid\n")

await database.disconnect()


@cli.command()
def check():
"""Check data7 project configuration.
Checks:
1. all settings files SHOULD exist
2. settings files format SHOULD be valid YAML
3. configured database connection SHOUD be valid (driver installed and valid url)
4. datasets SQL queries SHOULD be valid SQL
"""
check_settings_files_exist()
check_settings_files_format()
async_run(check_database_connection)
async_run(check_datasets_queries)

console.print("\n💫 All checks are successful. w00t 🎉")


@cli.command()
def run( # noqa: PLR0913
host: Optional[str] = None,
port: Optional[int] = None,
reload: bool = True,
workers: Optional[int] = None,
root_path: str = "",
proxy_headers: bool = False,
log_level: LogLevels = LogLevels.INFO,
):
"""Run data7 web server."""
default_host = "localhost"
default_port = 8000
host = (
data7.config.settings.get("HOST", default_host)
if host is None
else default_host
)
port = (
data7.config.settings.get("PORT", default_port)
if port is None
else default_port
)
# Configure logging
log_config = copy.copy(uvicorn.config.LOGGING_CONFIG)
log_config["loggers"]["data7.app"] = {
"handlers": ["default"],
"level": log_level.value.upper(),
"propagate": False,
}

uvicorn.run(
"data7.app:app",
host=host,
port=port,
reload=reload,
workers=workers,
root_path=root_path,
proxy_headers=proxy_headers,
log_level=log_level,
log_config=log_config,
)
Loading

0 comments on commit 6e85364

Please sign in to comment.