Skip to content

Commit

Permalink
Crawlers: Use TRUNCATE TABLE instead of DELETE FROM when resettin…
Browse files Browse the repository at this point in the history
…g crawler tables. (#2392)

## Changes

This PR updates the `.reset()` implementation for crawlers to use
`TRUNCATE TABLE` instead of `DELETE FROM`. This is slightly more
efficient and idiomatic (but not an issue of correctness).

### Functionality

- [X] modified existing workflow: `migrate-data-reconciliation`

### Tests

- [X] updated unit tests
- [X] covered by integration tests
  • Loading branch information
asnare authored Aug 12, 2024
1 parent f91dd42 commit 3fe030a
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/framework/crawlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from databricks.labs.lsql.backends import SqlBackend
from databricks.sdk.errors import NotFound

from databricks.labs.ucx.framework.utils import escape_sql_identifier

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -52,7 +54,7 @@ def reset(self):
Delete the content of the inventory table.
The next call to `snapshot` will re-populate the table.
"""
self._exec(f"DELETE FROM {self.full_name}")
self._exec(f"TRUNCATE TABLE {escape_sql_identifier(self.full_name)}")

@staticmethod
def _valid(name: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/hive_metastore/test_table_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,7 +921,7 @@ def test_table_status_reset():
table_status_crawler = MigrationStatusRefresher(client, backend, "ucx", table_crawler)
table_status_crawler.reset()
assert backend.queries == [
"DELETE FROM hive_metastore.ucx.migration_status",
"TRUNCATE TABLE hive_metastore.ucx.migration_status",
]
table_crawler.snapshot.assert_not_called()
client.catalogs.list.assert_not_called()
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/hive_metastore/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,6 @@ def test_migrate_ctas_views(run_workflow):
def test_refresh_migration_status_is_refreshed(run_workflow, workflow):
"""Migration status is refreshed by deleting and showing new tables"""
ctx = run_workflow(getattr(workflow, "refresh_migration_status"))
assert "DELETE FROM hive_metastore.ucx.migration_status" in ctx.sql_backend.queries
assert "TRUNCATE TABLE hive_metastore.ucx.migration_status" in ctx.sql_backend.queries
assert "SHOW DATABASES" in ctx.sql_backend.queries
# No "SHOW TABLE FROM" query as table are not mocked

0 comments on commit 3fe030a

Please sign in to comment.