Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move service definition files #50

Merged
merged 10 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ jobs:
repository: ecmwf-projects/cdm-obs.git
ref: 'new-variables'
path: common_data_model
- name: Download cads-forms-insitu
env:
BITBUCKET_TOKEN: ${{ secrets.BITBUCKET_TOKEN }}
timeout-minutes: 2
run: |
git clone --depth 1 -b dev https://"$BITBUCKET_TOKEN"@git.ecmwf.int/scm/cds/cads-forms-insitu.git
- name: Deploy test ingestion database
env:
TEST_INGESTION_DB_PASS: ${{ secrets.TEST_INGESTION_DB_PASS }}
Expand Down Expand Up @@ -88,6 +94,7 @@ jobs:
STORAGE_SECRET_KEY: ${{ secrets.STORAGE_SECRET_KEY}}
STORAGE_SECURE: ${{ secrets.STORAGE_SECURE}}
CDM_TABLES_LOCATION: ${{ github.workspace }}
CADS_OBS_INSITU_LOCATION: ${{ github.workspace }}
run: |
ls ${GITHUB_WORKSPACE}/common_data_model/*
make unit-tests COV_REPORT=xml
Expand Down
13 changes: 6 additions & 7 deletions cdsobs/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from cdsobs.metadata import get_dataset_metadata
from cdsobs.observation_catalogue.repositories.cads_dataset import CadsDatasetRepository
from cdsobs.retrieve.filter_datasets import between
from cdsobs.service_definition.api import get_service_definition
from cdsobs.service_definition.service_definition_models import ServiceDefinition
from cdsobs.storage import S3Client
from cdsobs.utils.logutils import get_logger
Expand All @@ -38,7 +39,6 @@

def run_ingestion_pipeline(
dataset_name: str,
service_definition: ServiceDefinition,
source: str,
session: Session,
config: CDSObsConfig,
Expand All @@ -60,8 +60,6 @@ def run_ingestion_pipeline(
----------
dataset_name :
Name of the dataset, for example insitu-observations-woudc-ozone-total-column-and-profiles
service_definition :
Object produced parsing the service_definition.json.
source :
Name of the data type to read from the dataset. For example "OzoneSonde".
session :
Expand All @@ -80,6 +78,7 @@ def run_ingestion_pipeline(
Month to start reading the data. It only applies to the first year of the interval.
Default is 1.
"""
service_definition = get_service_definition(config, dataset_name)

def _run_for_batch(time_space_batch):
try:
Expand All @@ -106,7 +105,6 @@ def _run_for_batch(time_space_batch):

def run_make_cdm(
dataset_name: str,
service_definition: ServiceDefinition,
source: str,
config: CDSObsConfig,
start_year: int,
Expand All @@ -125,8 +123,6 @@ def run_make_cdm(
----------
dataset_name :
Name of the dataset, for example insitu-observations-woudc-ozone-total-column-and-profiles
service_definition
Object produced parsing the service_definition.json.
source
Name of the data type to read from the dataset. For example "OzoneSonde".
config
Expand All @@ -142,6 +138,7 @@ def run_make_cdm(
make_production. If False, the data only will be loaded and checked for CDM
compliance in memory.
"""
service_definition = get_service_definition(config, dataset_name)

def _run_for_batch(time_batch):
try:
Expand Down Expand Up @@ -196,7 +193,9 @@ def _run_ingestion_pipeline_for_batch(
By default, these time intervals will be skipped.
"""
if not update and _entry_exists(dataset_name, session, source, time_space_batch):
logger.warning("A partition with the chosen parameters already exists")
logger.warning(
"A partition with the chosen parameters already exists and update is set to False."
)
else:
sorted_partitions = _read_homogenise_and_partition(
config, dataset_name, service_definition, source, time_space_batch
Expand Down
20 changes: 0 additions & 20 deletions cdsobs/api_rest/config_helper.py

This file was deleted.

14 changes: 10 additions & 4 deletions cdsobs/api_rest/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,23 @@ def get_capabilities(


@router.get("/capabilities/{dataset}/sources")
def get_sources(dataset: str) -> list[str]:
def get_sources(
dataset: str,
session: Annotated[HttpAPISession, Depends(session_gen)],
) -> list[str]:
"""Get available sources for a given dataset."""
service_definition = get_service_definition(dataset)
service_definition = get_service_definition(session.cdsobs_config, dataset)
return list(service_definition.sources)


@router.get("/{dataset}/service_definition")
def get_dataset_service_definition(dataset: str) -> ServiceDefinition:
def get_dataset_service_definition(
dataset: str,
session: Annotated[HttpAPISession, Depends(session_gen)],
) -> ServiceDefinition:
"""Get the service definition for a dataset."""
try:
return get_service_definition(dataset)
return get_service_definition(session.cdsobs_config, dataset)
except FileNotFoundError:
raise make_http_exception(
status_code=404, message=f"Service definition not found for {dataset=}"
Expand Down
1 change: 1 addition & 0 deletions cdsobs/cli/_get_forms_jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def get_forms_jsons_command(
dataset_name,
catalogue_repository,
output_dir,
config=config,
upload_to_storage=upload,
storage_client=storage_client,
get_stations_file=stations_file,
Expand Down
41 changes: 12 additions & 29 deletions cdsobs/cli/_make_cdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,12 @@
from cdsobs.api import run_make_cdm
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import read_and_validate_config
from cdsobs.service_definition.api import validate_service_definition


def make_cdm(
dataset_name: str = typer.Option(
..., "--dataset", "-d", help="Dataset name", show_default=False
),
service_definition_json: Path = typer.Option(
...,
"--service-definition",
"-s",
help="Path to the service_definition.json",
show_default=False,
),
start_year: int = typer.Option(
..., help="Year to start processing the data", show_default=False
),
Expand All @@ -28,7 +20,9 @@ def make_cdm(
),
cdsobs_config_yml: Path = config_yml_typer,
source: str = typer.Option(
"all", help="Process only a given source, by default it processes all"
...,
help="Source to process. Sources are defined in the service definition file,"
"in the sources mapping.",
),
output_dir: Path = typer.Option(
tempfile.gettempdir(),
Expand All @@ -45,23 +39,12 @@ def make_cdm(
):
"""Prepare the data to be uploaded without actually uploading it."""
config = read_and_validate_config(cdsobs_config_yml)

# read and validate service definition
service_definition = validate_service_definition(
str(service_definition_json), config.cdm_tables_location
)[0]
assert service_definition is not None

# Check if we selected only one source
sources = [source] if source != "all" else service_definition.sources.keys()
for source in sources:
run_make_cdm(
dataset_name,
service_definition,
source,
config,
start_year=start_year,
end_year=end_year,
output_dir=output_dir,
save_data=save_data,
)
run_make_cdm(
dataset_name,
source,
config,
start_year=start_year,
end_year=end_year,
output_dir=output_dir,
save_data=save_data,
)
45 changes: 13 additions & 32 deletions cdsobs/cli/_make_production.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,12 @@
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import read_and_validate_config
from cdsobs.observation_catalogue.database import get_session
from cdsobs.service_definition.api import validate_service_definition


def make_production(
dataset_name: str = typer.Option(
..., "--dataset", "-d", help="Dataset name", show_default=False
),
service_definition_json: Path = typer.Option(
...,
"--service-definition",
"-s",
help="Path to the service_definition.json",
show_default=False,
),
start_year: int = typer.Option(
..., help="Year to start processing the data", show_default=False
),
Expand All @@ -28,7 +20,9 @@ def make_production(
),
cdsobs_config_yml: Path = config_yml_typer,
source: str = typer.Option(
"all", help="Process only a given source, by default it processes all"
...,
help="Source to process. Sources are defined in the service definition file,"
"in the sources mapping.",
),
update: bool = typer.Option(
False,
Expand All @@ -54,27 +48,14 @@ def make_production(
uploads it to the observation catalogue and storage.
"""
config = read_and_validate_config(cdsobs_config_yml)

# read and validate service definition
service_definition = validate_service_definition(
str(service_definition_json), config.cdm_tables_location
)[0]
assert service_definition is not None

# Check if we selected only one source
sources = [source] if source != "all" else service_definition.sources.keys()

# ingestion pipeline per source
with get_session(config.catalogue_db) as session:
for source in sources:
run_ingestion_pipeline(
dataset_name,
service_definition,
source,
session,
config,
start_year,
end_year,
update,
start_month,
)
run_ingestion_pipeline(
dataset_name,
source,
session,
config,
start_year,
end_year,
update,
start_month,
)
2 changes: 1 addition & 1 deletion cdsobs/cli/_retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def retrieve(
config = validate_config(cdsobs_config_yml)
s3_client = S3Client.from_config(config.s3config)
output_file = retrieve_observations(
config.catalogue_db.get_url(),
config,
s3_client.public_url_base,
retrieve_args,
output_dir,
Expand Down
13 changes: 11 additions & 2 deletions cdsobs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,16 @@


def _get_default_cdm_tables_location() -> Path:
if "CDM_TABLES_LOCATION" in os.environ:
return Path(os.environ["CDM_TABLES_LOCATION"])
return _get_default_location("CDM_TABLES_LOCATION")


def _get_default_cads_forms_insitu_location() -> Path:
return _get_default_location("CADS_OBS_INSITU_LOCATION")


def _get_default_location(env_varname: str) -> Path:
if env_varname in os.environ:
return Path(os.environ[env_varname])
else:
return Path.home().joinpath(".cdsobs")

Expand Down Expand Up @@ -183,6 +191,7 @@ class CDSObsConfig(pydantic.BaseModel):
ingestion_databases: Dict[str, DBConfig]
datasets: List[DatasetConfig]
cdm_tables_location: Path = _get_default_cdm_tables_location()
cads_obs_insitu_location: Path = _get_default_cads_forms_insitu_location()

@classmethod
def from_yaml(cls, config_file: Path) -> "CDSObsConfig":
Expand Down
4 changes: 0 additions & 4 deletions cdsobs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@

# From here, all constants are for the tests
cdsobs_path = typing.cast(Path, importlib.resources.files("cdsobs"))
SERVICE_DEFINITION_YML = Path(
cdsobs_path,
"data/insitu-observations-woudc-ozone-total-column-and-profiles/service_definition.yml",
)

TEST_VAR_OUT = "air_temperature"

Expand Down
Loading
Loading