From 05b46f4037582133c8c60850891f703ddad1256d Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Fri, 13 Dec 2024 08:04:58 -0800 Subject: [PATCH 1/4] include schema migrations --- cache/BiocFileCache.sqlite | Bin 0 -> 32768 bytes src/pybiocfilecache/cache.py | 24 +++ src/pybiocfilecache/const.py | 1 - src/pybiocfilecache/migrations/__init__.py | 1 + src/pybiocfilecache/migrations/migration.py | 139 ++++++++++++++++++ .../migrations/migrationV0_5_0.py | 102 +++++++++++++ 6 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 cache/BiocFileCache.sqlite delete mode 100644 src/pybiocfilecache/const.py create mode 100644 src/pybiocfilecache/migrations/__init__.py create mode 100644 src/pybiocfilecache/migrations/migration.py create mode 100644 src/pybiocfilecache/migrations/migrationV0_5_0.py diff --git a/cache/BiocFileCache.sqlite b/cache/BiocFileCache.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..6fe0e967380c715d0dece780507007c5e1ab7df7 GIT binary patch literal 32768 zcmeI&J#W)M7{GD6FHRdZ3R~16(y3D{5EbgmKxhq$NZmGdq;|n_<11JsO|V_mGJrs0 zL*ircW%vM$oJ}vSi8?Uf-;(9K^Z7YHzelokN{)~ENiN0tG@V4b*s&g1wrxEV!m_MQ zb*`yXAC79Q=#IMDciQ(gH?7B?P8!YMR&C?E)f_f|Hm$~w#?y^os>^`@0tg_000Iag zfB*sp3%ss5t?jn`Ik|}CRdN-kGMiqeBN?U_(M0Or+U~&X2A&xDy`!Ngdj6hwA{J(g z17GMTM5hek`i|3b+qPVYpTv4NU;?@oU8brr1V2i{9>ApC<$M5&kw?4^f>75AG$?^Xf%>p_J7Z$ zvnYQ%Up39IXSeTgUb#1pvOJtj$ew+_)4|4hOyc?%-6s@=irZao64B<+6qoUBii5CXVtbd?&AUd*$C6>KnzRqK3LJ zI$LUF%bnR}5x?w|Mbvlob*WNBJt{i&_44U5g-Q`WMB~d_v`P+t*Q>kROHOOIZChm; z+555D(}&SzJ}vv9UL$;>Tg&P*Q||2>#rNic!5 str: + """Get current schema version.""" + return self.migrator.get_current_version() + + def migrate_schema(self, target_version: Optional[str] = None) -> None: + """Migrate schema to specified version. + + Args: + target_version: Version to migrate to. + If None, migrates to latest version. + """ + self.migrator.migrate(target_version) + def _setup_cache_dir(self) -> None: if not self.config.cache_dir.exists(): self.config.cache_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/pybiocfilecache/const.py b/src/pybiocfilecache/const.py deleted file mode 100644 index 5f28ad4..0000000 --- a/src/pybiocfilecache/const.py +++ /dev/null @@ -1 +0,0 @@ -SCHEMA_VERSION = "0.99.4" diff --git a/src/pybiocfilecache/migrations/__init__.py b/src/pybiocfilecache/migrations/__init__.py new file mode 100644 index 0000000..59c8270 --- /dev/null +++ b/src/pybiocfilecache/migrations/__init__.py @@ -0,0 +1 @@ +from .migration import Migrator diff --git a/src/pybiocfilecache/migrations/migration.py b/src/pybiocfilecache/migrations/migration.py new file mode 100644 index 0000000..ee84259 --- /dev/null +++ b/src/pybiocfilecache/migrations/migration.py @@ -0,0 +1,139 @@ +import logging +from typing import Optional + +from sqlalchemy import text +from sqlalchemy.engine import Engine + +__author__ = "Jayaram Kancherla" +__copyright__ = "Jayaram Kancherla" +__license__ = "MIT" + + +logger = logging.getLogger(__name__) + + +class Migration: + """Base class for migrations.""" + + version: str + description: str + + @staticmethod + def up(engine: Engine) -> None: + """Upgrade to this version.""" + raise NotImplementedError + + @staticmethod + def down(engine: Engine) -> None: + """Downgrade from this version.""" + raise NotImplementedError + + +class Migrator: + """Handles database schema migrations.""" + + def __init__(self, engine: Engine): + from .migrationV0_5_0 import MigrationV0_5_0 + + self.engine = engine + self.migrations = [MigrationV0_5_0] + + def _detect_version_from_structure(self) -> str: + """Detect schema version by examining table structure.""" + with self.engine.connect() as conn: + # Check table structure + columns = conn.execute( + text(""" + PRAGMA table_info(resource); + """) + ).fetchall() + column_names = {col[1] for col in columns} + + # Check for columns that indicate version + if "is_compressed" in column_names: + return "0.5.0" + elif "tags" in column_names and "size_bytes" in column_names: + return "0.5.0" + else: + return "0.4.1" + + def get_current_version(self) -> Optional[str]: + """Get current schema version from database. + + Will attempt to detect version if `schema_version` is not in metadata. + """ + with self.engine.connect() as conn: + result = conn.execute( + text(""" + SELECT value FROM metadata + WHERE key = 'schema_version' + """) + ) + row = result.fetchone() + + if row is not None: + return row[0] + + detected_version = self._detect_version_from_structure() + conn.execute( + text(""" + INSERT INTO metadata (key, value) + VALUES ('schema_version', :version); + """), + {"version": detected_version}, + ) + + return detected_version + + def migrate(self, target_version: Optional[str] = None) -> None: + """Migrate schema to target version. + + Args: + target_version: + Version to migrate to. + If None, migrates to latest version. + """ + try: + current = self.get_current_version() + latest_version = self.migrations[-1].version + target_version = target_version or latest_version + + if current == target_version: + logger.info("Already at target version") + return + + if current == "0.4.1" and target_version == "0.5.0": + logger.info("Upgrading from 0.4.1 to 0.5.0") + self.migrations[0].up(self.engine) + elif current == "0.5.0" and target_version == "0.4.1": + logger.info("Downgrading from 0.5.0 to 0.4.1") + self.migrations[0].down(self.engine) + else: + raise ValueError(f"Unsupported migration path: {current} -> {target_version}") + + except Exception as e: + logger.error(f"Migration failed: {e}") + raise + + # When we have multiple migrations + # current_idx = next((i for i, m in enumerate(self.migrations) if m.version == current), None) + # target_idx = next((i for i, m in enumerate(self.migrations) if m.version == target_version), None) + + # if current_idx is None: + # raise ValueError(f"Unknown current version: {current}") + # if target_idx is None: + # raise ValueError(f"Unknown target version: {target_version}") + + # if current_idx < target_idx: + # # Upgrade + # for migration in self.migrations[current_idx : target_idx + 1]: + # logger.info(f"Upgrading to {migration.version}") + # migration.up(self.engine) + # elif current_idx > target_idx: + # # Downgrade + # for migration in reversed(self.migrations[target_idx + 1 : current_idx + 1]): + # logger.info(f"Downgrading from {migration.version}") + # migration.down(self.engine) + + def __repr__(self) -> str: + return f"Migrator(current_version={self.get_current_version()})" diff --git a/src/pybiocfilecache/migrations/migrationV0_5_0.py b/src/pybiocfilecache/migrations/migrationV0_5_0.py new file mode 100644 index 0000000..c50bb1e --- /dev/null +++ b/src/pybiocfilecache/migrations/migrationV0_5_0.py @@ -0,0 +1,102 @@ +from sqlalchemy import text +from sqlalchemy.engine import Engine + +from .migration import Migration + +__author__ = "Jayaram Kancherla" +__copyright__ = "Jayaram Kancherla" +__license__ = "MIT" + + +class MigrationV0_5_0(Migration): + """Migration from v0.4.1 to 0.5.0.""" + + version = "0.5.0" + description = "Add tags, size_bytes, compression flag, and update indexes" + + @staticmethod + def up(engine: Engine) -> None: + """Upgrade from v0.4.1 to v0.5.0.""" + with engine.begin() as conn: + # Add new columns + conn.execute( + text(""" + ALTER TABLE resource + ADD COLUMN tags TEXT; + """) + ) + conn.execute( + text(""" + ALTER TABLE resource + ADD COLUMN size_bytes INTEGER; + """) + ) + # conn.execute( + # text(""" + # ALTER TABLE resource + # ADD COLUMN is_compressed BOOLEAN DEFAULT FALSE; + # """) + # ) + + # Calculate size_bytes for existing resources + conn.execute( + text(""" + UPDATE resource + SET size_bytes = ( + SELECT length(readfile(rpath)) + FROM resource r2 + WHERE r2.id = resource.id + ) + WHERE EXISTS ( + SELECT 1 + FROM resource r3 + WHERE r3.id = resource.id + AND r3.rpath IS NOT NULL + ); + """) + ) + + # Add indexes + conn.execute( + text(""" + CREATE UNIQUE INDEX IF NOT EXISTS ix_resource_rname + ON resource(rname); + """) + ) + conn.execute( + text(""" + CREATE INDEX IF NOT EXISTS ix_resource_rid + ON resource(rid); + """) + ) + + # Update metadata + conn.execute( + text(""" + UPDATE metadata + SET value = '0.5.0' + WHERE key = 'schema_version'; + """) + ) + + @staticmethod + def down(engine: Engine) -> None: + """Downgrade from v0.5.0 to 0.4.1.""" + with engine.begin() as conn: + # Remove indexes + conn.execute(text("DROP INDEX IF EXISTS ix_resource_rname;")) + conn.execute(text("DROP INDEX IF EXISTS ix_resource_rid;")) + + # Remove columns + # conn.execute(text("ALTER TABLE resource DROP COLUMN is_compressed;")) + conn.execute(text("ALTER TABLE resource DROP COLUMN size_bytes;")) + conn.execute(text("ALTER TABLE resource DROP COLUMN tags;")) + + # Update metadata + conn.execute( + text(""" + UPDATE metadata + SET value = '0.4.1' + WHERE key = 'schema_version'; + """) + ) From d18075293c88d66ae1cf5448268968102c715525 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Fri, 13 Dec 2024 08:05:58 -0800 Subject: [PATCH 2/4] remove test cache file --- cache/BiocFileCache.sqlite | Bin 32768 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 cache/BiocFileCache.sqlite diff --git a/cache/BiocFileCache.sqlite b/cache/BiocFileCache.sqlite deleted file mode 100644 index 6fe0e967380c715d0dece780507007c5e1ab7df7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32768 zcmeI&J#W)M7{GD6FHRdZ3R~16(y3D{5EbgmKxhq$NZmGdq;|n_<11JsO|V_mGJrs0 zL*ircW%vM$oJ}vSi8?Uf-;(9K^Z7YHzelokN{)~ENiN0tG@V4b*s&g1wrxEV!m_MQ zb*`yXAC79Q=#IMDciQ(gH?7B?P8!YMR&C?E)f_f|Hm$~w#?y^os>^`@0tg_000Iag zfB*sp3%ss5t?jn`Ik|}CRdN-kGMiqeBN?U_(M0Or+U~&X2A&xDy`!Ngdj6hwA{J(g z17GMTM5hek`i|3b+qPVYpTv4NU;?@oU8brr1V2i{9>ApC<$M5&kw?4^f>75AG$?^Xf%>p_J7Z$ zvnYQ%Up39IXSeTgUb#1pvOJtj$ew+_)4|4hOyc?%-6s@=irZao64B<+6qoUBii5CXVtbd?&AUd*$C6>KnzRqK3LJ zI$LUF%bnR}5x?w|Mbvlob*WNBJt{i&_44U5g-Q`WMB~d_v`P+t*Q>kROHOOIZChm; z+555D(}&SzJ}vv9UL$;>Tg&P*Q||2>#rNic!5 Date: Fri, 13 Dec 2024 16:06:25 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pybiocfilecache/migrations/migration.py | 4 ++-- .../migrations/migrationV0_5_0.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/pybiocfilecache/migrations/migration.py b/src/pybiocfilecache/migrations/migration.py index ee84259..cd95632 100644 --- a/src/pybiocfilecache/migrations/migration.py +++ b/src/pybiocfilecache/migrations/migration.py @@ -65,7 +65,7 @@ def get_current_version(self) -> Optional[str]: with self.engine.connect() as conn: result = conn.execute( text(""" - SELECT value FROM metadata + SELECT value FROM metadata WHERE key = 'schema_version' """) ) @@ -77,7 +77,7 @@ def get_current_version(self) -> Optional[str]: detected_version = self._detect_version_from_structure() conn.execute( text(""" - INSERT INTO metadata (key, value) + INSERT INTO metadata (key, value) VALUES ('schema_version', :version); """), {"version": detected_version}, diff --git a/src/pybiocfilecache/migrations/migrationV0_5_0.py b/src/pybiocfilecache/migrations/migrationV0_5_0.py index c50bb1e..2a8b1a5 100644 --- a/src/pybiocfilecache/migrations/migrationV0_5_0.py +++ b/src/pybiocfilecache/migrations/migrationV0_5_0.py @@ -21,19 +21,19 @@ def up(engine: Engine) -> None: # Add new columns conn.execute( text(""" - ALTER TABLE resource + ALTER TABLE resource ADD COLUMN tags TEXT; """) ) conn.execute( text(""" - ALTER TABLE resource + ALTER TABLE resource ADD COLUMN size_bytes INTEGER; """) ) # conn.execute( # text(""" - # ALTER TABLE resource + # ALTER TABLE resource # ADD COLUMN is_compressed BOOLEAN DEFAULT FALSE; # """) # ) @@ -59,13 +59,13 @@ def up(engine: Engine) -> None: # Add indexes conn.execute( text(""" - CREATE UNIQUE INDEX IF NOT EXISTS ix_resource_rname + CREATE UNIQUE INDEX IF NOT EXISTS ix_resource_rname ON resource(rname); """) ) conn.execute( text(""" - CREATE INDEX IF NOT EXISTS ix_resource_rid + CREATE INDEX IF NOT EXISTS ix_resource_rid ON resource(rid); """) ) @@ -73,8 +73,8 @@ def up(engine: Engine) -> None: # Update metadata conn.execute( text(""" - UPDATE metadata - SET value = '0.5.0' + UPDATE metadata + SET value = '0.5.0' WHERE key = 'schema_version'; """) ) @@ -95,8 +95,8 @@ def down(engine: Engine) -> None: # Update metadata conn.execute( text(""" - UPDATE metadata - SET value = '0.4.1' + UPDATE metadata + SET value = '0.4.1' WHERE key = 'schema_version'; """) ) From 86630b0c3d10e06a44bed23a56140a1261a14c24 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Fri, 13 Dec 2024 08:08:49 -0800 Subject: [PATCH 4/4] remove is compressed check --- src/pybiocfilecache/migrations/migration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pybiocfilecache/migrations/migration.py b/src/pybiocfilecache/migrations/migration.py index cd95632..d7b5a68 100644 --- a/src/pybiocfilecache/migrations/migration.py +++ b/src/pybiocfilecache/migrations/migration.py @@ -50,9 +50,9 @@ def _detect_version_from_structure(self) -> str: column_names = {col[1] for col in columns} # Check for columns that indicate version - if "is_compressed" in column_names: - return "0.5.0" - elif "tags" in column_names and "size_bytes" in column_names: + # if "is_compressed" in column_names: + # return "0.5.0" + if "tags" in column_names and "size_bytes" in column_names: return "0.5.0" else: return "0.4.1"