Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forms fixes copds 1838 #26

Merged
merged 21 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
uses: actions/checkout@v3
with:
repository: ecmwf-projects/cdm-obs.git
ref: 'cadsobs-changes'
ref: 'new-variables'
path: common_data_model
- name: Deploy test ingestion database
env:
Expand All @@ -52,7 +52,7 @@ jobs:
echo TEST_INGESTION_DB_PASS=$TEST_INGESTION_DB_PASS >> .env
echo CATALOGUE_PASSWORD=$CATALOGUE_PASSWORD >> .env
echo STORAGE_PASSWORD=$STORAGE_PASSWORD >> .env
docker-compose up -d
docker compose up -d
until pg_isready -U user -d baron -p 25432 -h localhost; do sleep 2; done
- name: Download test netCDFs
timeout-minutes: 2
Expand Down
3 changes: 1 addition & 2 deletions cdsobs/api_rest/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from cdsobs.api_rest.models import RetrievePayload
from cdsobs.cdm.api import get_auxiliary_variables_mapping
from cdsobs.cdm.lite import cdm_lite_variables
from cdsobs.cli._utils import ConfigNotFound
from cdsobs.config import CDSObsConfig, validate_config
from cdsobs.ingestion.core import get_variables_from_service_definition
from cdsobs.observation_catalogue.repositories.cads_dataset import CadsDatasetRepository
Expand All @@ -21,7 +20,7 @@
from cdsobs.service_definition.api import get_service_definition
from cdsobs.service_definition.service_definition_models import ServiceDefinition
from cdsobs.storage import S3Client
from cdsobs.utils.exceptions import DataNotFoundException, SizeError
from cdsobs.utils.exceptions import ConfigNotFound, DataNotFoundException, SizeError
from cdsobs.utils.utils import get_database_session

router = APIRouter()
Expand Down
25 changes: 25 additions & 0 deletions cdsobs/cdm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ def to_cdm_dataset(partition: DatasetPartition) -> CdmDataset:
cdm_variables += cdm_variables_with_table_names
cdm_variables = unique([v for v in cdm_variables if v in partition.data])
data = partition.data.loc[:, cdm_variables].set_index("observation_id")
original_variables = set(partition.data.columns)
removed_variables = original_variables - set(cdm_variables)
if len(removed_variables) > 0:
logger.warning(
"The following variables where read but are not in the CDM and "
f"are going to be dropped: {removed_variables}"
)
return CdmDataset(data, partition.partition_params, partition.dataset_metadata)


Expand Down Expand Up @@ -330,6 +337,24 @@ def auxfield2metadata_name(self, var: str, aux_var: str) -> str:
auxf["metadata_name"] for auxf in self[var] if auxf["auxvar"] == aux_var
][0]

def vars_with_processing_level(self) -> list[str]:
return [v for v in self if self.var_has_processing_level(v)]

def var_has_processing_level(self, var: str) -> bool:
return any(auxf["auxvar"] in self.processing_level_fields for auxf in self[var])

@property
def processing_level_fields(self) -> list[str]:
return [auxf for auxf in self.all_list if "processing_level" in auxf]

def get_var_processing_level_field_name(self, var: str) -> str:
return [
auxf["auxvar"]
for auxf in self[var]
if "processing_level" in auxf["auxvar"]
and "quality_flag" not in auxf["auxvar"]
][0]


def get_aux_fields_mapping_from_service_definition(
source_definition: SourceDefinition, variables: List[str]
Expand Down
4 changes: 3 additions & 1 deletion cdsobs/cdm/lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@
"quasisystematic_uncertainty",
"positive_quasisystematic_uncertainty",
"negative_quasisystematic_uncertainty",
"flag",
"quality_flag",
"combined_uncertainty",
"processing_level",
]
cdm_lite_variables = dict(
mandatory=variable_names,
Expand Down
4 changes: 1 addition & 3 deletions cdsobs/cli/_catalogue_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@
from cdsobs.observation_catalogue.schemas.catalogue import CliCatalogueFilters
from cdsobs.utils.logutils import get_logger

from ..utils.exceptions import ConfigError
from ..utils.exceptions import CliException, ConfigError, ConfigNotFound
from ._utils import (
PAGE_SIZE,
CliException,
ConfigNotFound,
config_yml_typer,
list_parser,
print_format_msg,
Expand Down
9 changes: 5 additions & 4 deletions cdsobs/cli/_copy_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
from sqlalchemy.orm import Session
from typer import Option

from cdsobs.cli._utils import CliException, ConfigNotFound, config_yml_typer
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import CDSObsConfig
from cdsobs.observation_catalogue.database import get_session
from cdsobs.observation_catalogue.models import Catalogue
from cdsobs.observation_catalogue.repositories.cads_dataset import CadsDatasetRepository
from cdsobs.observation_catalogue.repositories.catalogue import CatalogueRepository
from cdsobs.observation_catalogue.schemas.catalogue import CatalogueSchema
from cdsobs.storage import S3Client
from cdsobs.utils.exceptions import ConfigError
from cdsobs.utils.exceptions import CliException, ConfigError, ConfigNotFound
from cdsobs.utils.logutils import get_logger

logger = get_logger(__name__)
Expand Down Expand Up @@ -158,8 +158,9 @@ def copy_outside(init_config, dest_config, dataset, dest_dataset):
with get_session(init_config.catalogue_db) as init_session:
entries = CatalogueRepository(init_session).get_by_dataset(dataset)
if init_config.s3config == dest_config.s3config:
new_assets = s3_copy(init_s3client, entries, dest_dataset)
dest_s3client = init_s3client
# namespace may be different, so we need another 3 client here
dest_s3client = S3Client.from_config(dest_config.s3config)
new_assets = s3_copy(dest_s3client, entries, dest_dataset)
else:
# get new destination client as current client
dest_s3client = S3Client.from_config(dest_config.s3config)
Expand Down
4 changes: 1 addition & 3 deletions cdsobs/cli/_delete_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from rich.console import Console

from cdsobs.cli._utils import (
CliException,
ConfigNotFound,
config_yml_typer,
list_parser,
)
Expand All @@ -20,7 +18,7 @@
CliCatalogueFilters,
)
from cdsobs.storage import S3Client
from cdsobs.utils.exceptions import ConfigError
from cdsobs.utils.exceptions import CliException, ConfigError, ConfigNotFound

console = Console()

Expand Down
3 changes: 2 additions & 1 deletion cdsobs/cli/_get_forms_jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

import typer

from cdsobs.cli._utils import config_yml_typer, read_and_validate_config
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import read_and_validate_config
from cdsobs.forms_jsons import get_forms_jsons
from cdsobs.observation_catalogue.database import get_session
from cdsobs.observation_catalogue.repositories.catalogue import CatalogueRepository
Expand Down
3 changes: 2 additions & 1 deletion cdsobs/cli/_make_cdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import typer

from cdsobs.api import run_make_cdm
from cdsobs.cli._utils import config_yml_typer, read_and_validate_config
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import read_and_validate_config
from cdsobs.service_definition.api import validate_service_definition


Expand Down
3 changes: 2 additions & 1 deletion cdsobs/cli/_make_production.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import typer

from cdsobs.api import run_ingestion_pipeline
from cdsobs.cli._utils import config_yml_typer, read_and_validate_config
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import read_and_validate_config
from cdsobs.observation_catalogue.database import get_session
from cdsobs.service_definition.api import validate_service_definition

Expand Down
22 changes: 10 additions & 12 deletions cdsobs/cli/_object_storage.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from pathlib import Path

import typer
from rich.console import Console

from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import CDSObsConfig
from cdsobs.observation_catalogue.database import get_session
from cdsobs.observation_catalogue.repositories.catalogue import CatalogueRepository
from cdsobs.storage import S3Client, StorageClient
from cdsobs.utils.logutils import get_logger

console = Console()
logger = get_logger(__name__)


def check_consistency(
Expand Down Expand Up @@ -38,14 +38,14 @@ def check_if_missing_in_object_storage(
):
red_flag = False
if dataset is None:
console.print(
"Check if every dataset in the catalogue is in the object storage"
logger.info(
"Checking if every dataset in the catalogue is in the object storage"
)
page = 0
page_size = 10000
assets = catalogue_repo.get_all_assets(limit=page_size)
while len(assets):
console.print(f"Checking page {page+1} ({page_size} records per page)")
logger.info(f"Checking page {page+1} ({page_size} records per page)")
red_flag = assets_in_s3(assets, s3client)
page += 1
assets = catalogue_repo.get_all_assets(
Expand All @@ -55,17 +55,15 @@ def check_if_missing_in_object_storage(
assets = catalogue_repo.get_dataset_assets(dataset)
red_flag = assets_in_s3(assets, s3client)
if not red_flag:
console.print("[bold green] Found all assets in object storage [/bold green]")
logger.info("Found all assets in object storage.")


def assets_in_s3(assets, s3client) -> bool:
red_flag = False
for asset in assets:
bucket, name = asset.split("/")
if not s3client.object_exists(bucket, name):
console.print(
f"[bold red] Missing {str(asset)} in object storage [/bold red]"
)
logger.warning(f"Missing {str(asset)} in object storage.")
red_flag = True
return red_flag

Expand All @@ -77,7 +75,7 @@ def check_if_missing_in_catalogue(
):
red_flag = False
if dataset is None:
console.print(
logger.info(
"Check if every dataset in the object storage has a catalogue entry"
)
buckets = s3client.list_buckets()
Expand All @@ -91,14 +89,14 @@ def check_if_missing_in_catalogue(
objects = s3client.list_directory_objects(bucket)
red_flag = objects_in_catalogue(objects, bucket, s3client, catalogue_repo)
if not red_flag:
console.print("[bold green] Found all assets in catalogue [/bold green]")
logger.info("Found all assets in catalogue.")


def objects_in_catalogue(objects, bucket, s3client, catalogue_repo) -> bool:
red_flag = False
for obj in objects:
asset = s3client.get_asset(bucket, obj)
if not catalogue_repo.exists_asset(asset):
console.print(f"[bold red] Missing {asset} entry in catalogue [/bold red]")
logger.warning(f"Missing {asset} entry in catalogue.")
red_flag = True
return red_flag
3 changes: 2 additions & 1 deletion cdsobs/cli/_retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import typer
from rich.console import Console

from cdsobs.cli._utils import CliException, ConfigNotFound, config_yml_typer
from cdsobs.cli._utils import config_yml_typer
from cdsobs.config import validate_config
from cdsobs.retrieve.api import retrieve_observations
from cdsobs.retrieve.models import RetrieveArgs
from cdsobs.storage import S3Client
from cdsobs.utils.exceptions import CliException, ConfigNotFound


def retrieve(
Expand Down
37 changes: 7 additions & 30 deletions cdsobs/cli/_utils.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,27 @@
from pathlib import Path

import typer
from botocore.exceptions import BotoCoreError
from rich.console import Console
from sqlalchemy.exc import OperationalError

from cdsobs.config import CDSObsConfig, validate_config


class CliException(Exception):
pass

from cdsobs.utils.exceptions import CliException
from cdsobs.utils.logutils import get_logger

class ConfigNotFound(CliException):
def __init__(self, msg="Configuration yaml not found"):
self.message = msg
super().__init__(self.message)
logger = get_logger(__name__)


def exception_handler(exception):
more_info = "Set CLI_DEBUG=true to see the full trace"
err_console = Console(stderr=True)
if isinstance(exception, CliException):
err_console.print(f"Error:\\ {exception}")
logger.error(f"Error:\\ {exception}")
elif isinstance(exception, BotoCoreError):
err_console.print(
logger.error(
f"Error connecting or operating with the storage. {more_info}:\n {exception}."
)
elif isinstance(exception, OperationalError):
err_console.print(
logger.error(
f"Error connecting or querying the database. {more_info}:\n {exception}."
)
else:
err_console.print(
f"Error: ({type(exception).__name__}: {exception}), {more_info}"
)
logger.error(f"Error: ({type(exception).__name__}: {exception}), {more_info}")


config_yml_typer = typer.Option(
Expand All @@ -60,13 +47,3 @@ def list_parser(arg: str):
return []
else:
return arg.split(",")


def read_and_validate_config(cdsobs_config_yml: Path | None) -> CDSObsConfig:
# read and validate config yaml
if cdsobs_config_yml is None:
cdsobs_config_yml = Path.home().joinpath(".cdsobs/cdsobs_config.yml")
if not Path(cdsobs_config_yml).exists():
raise ConfigNotFound()
config = validate_config(cdsobs_config_yml)
return config
2 changes: 1 addition & 1 deletion cdsobs/cli/_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import typer

import cdsobs.service_definition.api
from cdsobs.cli._utils import CliException
from cdsobs.utils.exceptions import CliException


def validate_service_definition(
Expand Down
12 changes: 11 additions & 1 deletion cdsobs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import yaml
from pydantic_core.core_schema import ValidationInfo

from cdsobs.utils.exceptions import ConfigError
from cdsobs.utils.exceptions import ConfigError, ConfigNotFound
from cdsobs.utils.types import LatTileSize, LonTileSize, TimeTileSize


Expand Down Expand Up @@ -220,3 +220,13 @@ def validate_config(config_file: Path):
with config_file.open() as f:
config_dict = yaml.safe_load(f)
return CDSObsConfig(**config_dict)


def read_and_validate_config(cdsobs_config_yml: Path | None) -> CDSObsConfig:
# read and validate config yaml
if cdsobs_config_yml is None:
cdsobs_config_yml = Path.home().joinpath(".cdsobs/cdsobs_config.yml")
if not Path(cdsobs_config_yml).exists():
raise ConfigNotFound()
config = validate_config(cdsobs_config_yml)
return config
Loading
Loading