From f401d2ac958cc48cb1579e56832664a57d634b63 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 19 Jul 2024 13:30:29 -0400 Subject: [PATCH 1/7] added db schema --- bbconf/db_utils.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/bbconf/db_utils.py b/bbconf/db_utils.py index dc50920..b191459 100644 --- a/bbconf/db_utils.py +++ b/bbconf/db_utils.py @@ -300,6 +300,33 @@ def delete_bed_universe(mapper, connection, target): session.commit() +class GeoGseStats(Base): + __tablename__ = "geo_gse_stats" + + id: Mapped[str] = mapped_column(primary_key=True, index=True) + gse: Mapped[str] = mapped_column(nullable=False, comment="GSE number", unique=True) + status: Mapped[str] = mapped_column( + nullable=False, comment="Status of the GEO project" + ) + submission_date: Mapped[datetime.datetime] = mapped_column( + default=deliver_update_date, onupdate=deliver_update_date + ) + + +class GeoGsmStats(Base): + __tablename__ = "geo_gsm_stats" + + id: Mapped[str] = mapped_column(primary_key=True, index=True) + gse_status_id: Mapped[str] = mapped_column( + ForeignKey("geo_gse_stats.id", ondelete="CASCADE"), nullable=False, index=True + ) + gsm: Mapped[str] = mapped_column(nullable=False, comment="GSM number", unique=True) + sample_name: Mapped[str] = mapped_column() + status: Mapped[str] = mapped_column( + nullable=False, comment="Status of the GEO sample" + ) + + class BaseEngine: """ A class with base methods, that are used in several classes. From 0b1f9bf206e1680e4e743d0a40dd3f4d6e722019 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 19 Jul 2024 13:33:15 -0400 Subject: [PATCH 2/7] updated naming --- bbconf/db_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bbconf/db_utils.py b/bbconf/db_utils.py index b191459..f648a41 100644 --- a/bbconf/db_utils.py +++ b/bbconf/db_utils.py @@ -300,8 +300,8 @@ def delete_bed_universe(mapper, connection, target): session.commit() -class GeoGseStats(Base): - __tablename__ = "geo_gse_stats" +class GeoGseStatus(Base): + __tablename__ = "geo_gse_status" id: Mapped[str] = mapped_column(primary_key=True, index=True) gse: Mapped[str] = mapped_column(nullable=False, comment="GSE number", unique=True) @@ -313,8 +313,8 @@ class GeoGseStats(Base): ) -class GeoGsmStats(Base): - __tablename__ = "geo_gsm_stats" +class GeoGsmStatus(Base): + __tablename__ = "geo_gsm_status" id: Mapped[str] = mapped_column(primary_key=True, index=True) gse_status_id: Mapped[str] = mapped_column( From 447d8cc7a61aea441203ac550efd24d4da30aad5 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 19 Aug 2024 12:08:09 -0400 Subject: [PATCH 3/7] updated models for bbuploader --- bbconf/db_utils.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/bbconf/db_utils.py b/bbconf/db_utils.py index f648a41..dee2029 100644 --- a/bbconf/db_utils.py +++ b/bbconf/db_utils.py @@ -1,18 +1,18 @@ import datetime import logging from typing import List, Optional -import pandas as pd +import pandas as pd from sqlalchemy import TIMESTAMP, BigInteger, ForeignKey, Result, Select, event, select from sqlalchemy.dialects.postgresql import JSON from sqlalchemy.engine import URL, Engine, create_engine from sqlalchemy.event import listens_for -from sqlalchemy.exc import ProgrammingError, IntegrityError +from sqlalchemy.exc import IntegrityError, ProgrammingError from sqlalchemy.ext.compiler import compiles from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship from sqlalchemy_schemadisplay import create_schema_graph -from bbconf.const import PKG_NAME, LICENSES_CSV_URL +from bbconf.const import LICENSES_CSV_URL, PKG_NAME _LOGGER = logging.getLogger(PKG_NAME) @@ -303,7 +303,7 @@ def delete_bed_universe(mapper, connection, target): class GeoGseStatus(Base): __tablename__ = "geo_gse_status" - id: Mapped[str] = mapped_column(primary_key=True, index=True) + id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) gse: Mapped[str] = mapped_column(nullable=False, comment="GSE number", unique=True) status: Mapped[str] = mapped_column( nullable=False, comment="Status of the GEO project" @@ -311,20 +311,35 @@ class GeoGseStatus(Base): submission_date: Mapped[datetime.datetime] = mapped_column( default=deliver_update_date, onupdate=deliver_update_date ) + number_of_files: Mapped[int] = mapped_column(default=0, comment="Number of files") + number_of_success: Mapped[int] = mapped_column( + default=0, comment="Number of success" + ) + number_of_skips: Mapped[int] = mapped_column(default=0, comment="Number of skips") + number_of_fails: Mapped[int] = mapped_column(default=0, comment="Number of fails") + + gsm_status_mapper: Mapped[List["GeoGsmStatus"]] = relationship( + "GeoGsmStatus", back_populates="gse_status_mapper" + ) class GeoGsmStatus(Base): __tablename__ = "geo_gsm_status" - id: Mapped[str] = mapped_column(primary_key=True, index=True) + id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) gse_status_id: Mapped[str] = mapped_column( - ForeignKey("geo_gse_stats.id", ondelete="CASCADE"), nullable=False, index=True + ForeignKey("geo_gse_status.id", ondelete="CASCADE"), nullable=False, index=True ) - gsm: Mapped[str] = mapped_column(nullable=False, comment="GSM number", unique=True) + gsm: Mapped[str] = mapped_column(nullable=False, comment="GSM number", unique=False) sample_name: Mapped[str] = mapped_column() status: Mapped[str] = mapped_column( nullable=False, comment="Status of the GEO sample" ) + error: Mapped[str] = mapped_column(nullable=True, comment="Error message") + genome: Mapped[str] = mapped_column(nullable=True, comment="Genome") + gse_status_mapper: Mapped["GeoGseStatus"] = relationship( + "GeoGseStatus", back_populates="gsm_status_mapper" + ) class BaseEngine: From 81cb007c0dcdf7a8c81250682669133ba62f9b58 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 19 Aug 2024 12:08:24 -0400 Subject: [PATCH 4/7] isort --- bbconf/bbagent.py | 2 +- bbconf/config_parser/models.py | 2 +- bbconf/models/bed_models.py | 3 ++- bbconf/modules/bedfiles.py | 13 ++++++------- bbconf/modules/bedsets.py | 2 +- tests/test_bedfile.py | 2 +- tests/test_common.py | 3 ++- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/bbconf/bbagent.py b/bbconf/bbagent.py index 38d11b1..d6b101c 100644 --- a/bbconf/bbagent.py +++ b/bbconf/bbagent.py @@ -1,6 +1,6 @@ from functools import cached_property from pathlib import Path -from typing import Union, List +from typing import List, Union from sqlalchemy.orm import Session from sqlalchemy.sql import distinct, func, select diff --git a/bbconf/config_parser/models.py b/bbconf/config_parser/models.py index aa0a2b8..68c2100 100644 --- a/bbconf/config_parser/models.py +++ b/bbconf/config_parser/models.py @@ -1,6 +1,6 @@ +import logging from pathlib import Path from typing import Optional, Union -import logging from pydantic import BaseModel, ConfigDict, computed_field, field_validator from yacman import load_yaml diff --git a/bbconf/models/bed_models.py b/bbconf/models/bed_models.py index b27532a..36e70e3 100644 --- a/bbconf/models/bed_models.py +++ b/bbconf/models/bed_models.py @@ -3,9 +3,10 @@ from pydantic import BaseModel, ConfigDict, Field -from .base_models import FileModel from bbconf.const import DEFAULT_LICENSE +from .base_models import FileModel + class BedPlots(BaseModel): chrombins: FileModel = None diff --git a/bbconf/modules/bedfiles.py b/bbconf/modules/bedfiles.py index 1ca34ed..51728e1 100644 --- a/bbconf/modules/bedfiles.py +++ b/bbconf/modules/bedfiles.py @@ -3,8 +3,6 @@ from typing import Dict, Union import numpy as np -from tqdm import tqdm - from geniml.bbclient import BBClient from geniml.io import RegionSet from genimtools.tokenizers import RegionSet as GRegionSet @@ -12,18 +10,19 @@ from qdrant_client.models import Distance, PointIdsList, VectorParams from sqlalchemy import and_, delete, func, select from sqlalchemy.orm import Session +from tqdm import tqdm from bbconf.config_parser.bedbaseconfig import BedBaseConfig -from bbconf.const import PKG_NAME, ZARR_TOKENIZED_FOLDER, DEFAULT_LICENSE +from bbconf.const import DEFAULT_LICENSE, PKG_NAME, ZARR_TOKENIZED_FOLDER from bbconf.db_utils import Bed, BedStats, Files, TokenizedBed, Universes from bbconf.exceptions import ( BedBaseConfError, BedFIleExistsError, BEDFileNotFoundError, + QdrantInstanceNotInitializedError, TokenizeFileExistsError, TokenizeFileNotExistError, UniverseNotFoundError, - QdrantInstanceNotInitializedError, ) from bbconf.models.bed_models import ( BedClassification, @@ -34,15 +33,15 @@ BedMetadata, BedMetadataBasic, BedPEPHub, + BedPEPHubRestrict, BedPlots, + BedSetMinimal, BedStatsModel, FileModel, QdrantSearchResult, TokenizedBedResponse, - UniverseMetadata, TokenizedPathResponse, - BedPEPHubRestrict, - BedSetMinimal, + UniverseMetadata, ) _LOGGER = getLogger(PKG_NAME) diff --git a/bbconf/modules/bedsets.py b/bbconf/modules/bedsets.py index ffaaf44..64dd8f5 100644 --- a/bbconf/modules/bedsets.py +++ b/bbconf/modules/bedsets.py @@ -7,7 +7,7 @@ from bbconf.config_parser import BedBaseConfig from bbconf.const import PKG_NAME -from bbconf.db_utils import BedFileBedSetRelation, BedSets, BedStats, Files, Bed +from bbconf.db_utils import Bed, BedFileBedSetRelation, BedSets, BedStats, Files from bbconf.exceptions import BedSetExistsError, BedSetNotFoundError from bbconf.models.bed_models import BedStatsModel from bbconf.models.bedset_models import ( diff --git a/tests/test_bedfile.py b/tests/test_bedfile.py index 19fa0c5..07489c7 100644 --- a/tests/test_bedfile.py +++ b/tests/test_bedfile.py @@ -3,9 +3,9 @@ from sqlalchemy.sql import select from bbconf.bbagent import BedBaseAgent +from bbconf.const import DEFAULT_LICENSE from bbconf.db_utils import Bed, Files from bbconf.exceptions import BedFIleExistsError, BEDFileNotFoundError -from bbconf.const import DEFAULT_LICENSE from .conftest import SERVICE_UNAVAILABLE, get_bbagent from .utils import BED_TEST_ID, ContextManagerDBTesting diff --git a/tests/test_common.py b/tests/test_common.py index a508765..5b9048d 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,6 +1,7 @@ +import pytest + from bbconf.const import DEFAULT_LICENSE -import pytest from .conftest import SERVICE_UNAVAILABLE from .utils import ContextManagerDBTesting From 9afb8ddc1055f8bc281bd471fc0afbcfd49c332e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 21 Aug 2024 16:17:25 -0400 Subject: [PATCH 5/7] updated botocore version --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 95258b7..b5c1133 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -4,7 +4,7 @@ geniml >= 0.4.0 psycopg >= 3.1.15 colorlogs pydantic >= 2.6.4 -botocore >= 1.34.54 +botocore >= 1.35.3 boto3 >= 1.34.54 pephubclient >= 0.4.1 sqlalchemy_schemadisplay From dba3057c9de5c66d6f8a3ff84fa4dcb781e4eba7 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 21 Aug 2024 16:19:29 -0400 Subject: [PATCH 6/7] updated botocore version2 --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index b5c1133..21a4d73 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -4,7 +4,7 @@ geniml >= 0.4.0 psycopg >= 3.1.15 colorlogs pydantic >= 2.6.4 -botocore >= 1.35.3 +botocore boto3 >= 1.34.54 pephubclient >= 0.4.1 sqlalchemy_schemadisplay From ade2f8b79e44d7261f84d1a922e1a1fedb4a2a4f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 21 Aug 2024 16:24:57 -0400 Subject: [PATCH 7/7] bump version --- bbconf/_version.py | 2 +- docs/changelog.md | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bbconf/_version.py b/bbconf/_version.py index 906d362..43c4ab0 100644 --- a/bbconf/_version.py +++ b/bbconf/_version.py @@ -1 +1 @@ -__version__ = "0.6.0" +__version__ = "0.6.1" diff --git a/docs/changelog.md b/docs/changelog.md index 58394e5..bf40e34 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +# [0.6.1] - 2024-08-21 +## Added + +- DB tables for GEO uploader status + # [0.6.0] - 2024-05-01 ## Added