From ecd2a084d3cb802d6ff941bb0bdda19a591aa487 Mon Sep 17 00:00:00 2001 From: helanto Date: Tue, 3 Sep 2024 09:37:52 +0300 Subject: [PATCH] feat: Add CommitProperties data class --- python/deltalake/table.py | 168 ++++++++++++++++++++-------------- python/deltalake/writer.py | 34 ++++--- python/tests/test_alter.py | 7 +- python/tests/test_delete.py | 5 +- python/tests/test_merge.py | 4 +- python/tests/test_optimize.py | 9 +- python/tests/test_repair.py | 4 +- python/tests/test_restore.py | 4 +- python/tests/test_update.py | 4 +- python/tests/test_vacuum.py | 4 +- 10 files changed, 147 insertions(+), 96 deletions(-) diff --git a/python/deltalake/table.py b/python/deltalake/table.py index cce5f5bcbd..55998c19a1 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -148,6 +148,25 @@ def __init__( self.cleanup_expired_logs = cleanup_expired_logs +@dataclass(init=True) +class CommitProperties: + """The commit properties. Controls the behaviour of the commit.""" + + def __init__( + self, + custom_metadata: Optional[Dict[str, str]] = None, + max_commit_retries: Optional[int] = None, + ): + """Custom metadata to be stored in the commit. Controls the number of retries for the commit. + + Args: + custom_metadata: custom metadata that will be added to the transaction commit. + max_commit_retries: maximum number of times to retry the transaction commit. + """ + self.custom_metadata = custom_metadata + self.max_commit_retries = max_commit_retries + + @dataclass(init=True) class BloomFilterProperties: """The Bloom Filter Properties instance for the Rust parquet writer.""" @@ -742,9 +761,8 @@ def vacuum( retention_hours: Optional[int] = None, dry_run: bool = True, enforce_retention_duration: bool = True, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> List[str]: """ Run the Vacuum command on the Delta Table: list and delete files no longer referenced by the Delta table and are older than the retention threshold. @@ -753,9 +771,8 @@ def vacuum( retention_hours: the retention threshold in hours, if none then the value from `delta.deletedFileRetentionDuration` is used or default of 1 week otherwise. dry_run: when activated, list only the files, delete otherwise enforce_retention_duration: when disabled, accepts retention hours smaller than the value from `delta.deletedFileRetentionDuration`. - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Returns: the list of files no longer referenced by the Delta Table and are older than the retention threshold. """ @@ -767,9 +784,9 @@ def vacuum( dry_run, retention_hours, enforce_retention_duration, - custom_metadata, + commit_properties.custom_metadata if commit_properties else None, post_commithook_properties, - max_commit_retries, + commit_properties.max_commit_retries if commit_properties else None, ) def update( @@ -781,9 +798,8 @@ def update( predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, error_on_type_mismatch: bool = True, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> Dict[str, Any]: """`UPDATE` records in the Delta Table that matches an optional predicate. Either updates or new_values needs to be passed for it to execute. @@ -794,9 +810,8 @@ def update( predicate: a logical expression. writer_properties: Pass writer properties to the Rust parquet writer. error_on_type_mismatch: specify if update will return error if data types are mismatching :default = True - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Returns: the metrics from update @@ -874,9 +889,13 @@ def update( predicate, writer_properties, safe_cast=not error_on_type_mismatch, - custom_metadata=custom_metadata, + custom_metadata=commit_properties.custom_metadata + if commit_properties + else None, post_commithook_properties=post_commithook_properties, - max_commit_retries=max_commit_retries, + max_commit_retries=commit_properties.max_commit_retries + if commit_properties + else None, ) return json.loads(metrics) @@ -917,9 +936,8 @@ def merge( error_on_type_mismatch: bool = True, writer_properties: Optional[WriterProperties] = None, large_dtypes: Optional[bool] = None, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> "TableMerger": """Pass the source data which you want to merge on the target delta table, providing a predicate in SQL query like format. You can also specify on what to do when the underlying data types do not @@ -934,9 +952,9 @@ def merge( writer_properties: Pass writer properties to the Rust parquet writer large_dtypes: Deprecated, will be removed in 1.0 arrow_schema_conversion_mode: Large converts all types of data schema into Large Arrow types, passthrough keeps string/binary/list types untouched - custom_metadata: custom metadata that will be added to the transaction commit. + custom_metadata: properties for the commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. + Returns: TableMerger: TableMerger Object @@ -984,9 +1002,13 @@ def merge( target_alias=target_alias, safe_cast=not error_on_type_mismatch, writer_properties=writer_properties, - custom_metadata=custom_metadata, + custom_metadata=commit_properties.custom_metadata + if commit_properties + else None, post_commithook_properties=post_commithook_properties, - max_commit_retries=max_commit_retries, + max_commit_retries=commit_properties.max_commit_retries + if commit_properties + else None, ) return TableMerger(py_merge_builder, self._table) @@ -996,8 +1018,7 @@ def restore( *, ignore_missing_files: bool = False, protocol_downgrade_allowed: bool = False, - custom_metadata: Optional[Dict[str, str]] = None, - max_commit_retries: Optional[int] = None, + commit_properties: Optional[CommitProperties] = None, ) -> Dict[str, Any]: """ Run the Restore command on the Delta Table: restore table to a given version or datetime. @@ -1006,8 +1027,7 @@ def restore( target: the expected version will restore, which represented by int, date str or datetime. ignore_missing_files: whether the operation carry on when some data files missing. protocol_downgrade_allowed: whether the operation when protocol version upgraded. - custom_metadata: custom metadata that will be added to the transaction commit. - max_commit_retries: maximum number of times to retry the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. Returns: the metrics from restore. @@ -1017,16 +1037,24 @@ def restore( target.isoformat(), ignore_missing_files=ignore_missing_files, protocol_downgrade_allowed=protocol_downgrade_allowed, - custom_metadata=custom_metadata, - max_commit_retries=max_commit_retries, + custom_metadata=commit_properties.custom_metadata + if commit_properties + else None, + max_commit_retries=commit_properties.max_commit_retries + if commit_properties + else None, ) else: metrics = self._table.restore( target, ignore_missing_files=ignore_missing_files, protocol_downgrade_allowed=protocol_downgrade_allowed, - custom_metadata=custom_metadata, - max_commit_retries=max_commit_retries, + custom_metadata=commit_properties.custom_metadata + if commit_properties + else None, + max_commit_retries=commit_properties.max_commit_retries + if commit_properties + else None, ) return json.loads(metrics) @@ -1253,9 +1281,8 @@ def delete( self, predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> Dict[str, Any]: """Delete records from a Delta Table that statisfy a predicate. @@ -1267,24 +1294,26 @@ def delete( Args: predicate: a SQL where clause. If not passed, will delete all rows. writer_properties: Pass writer properties to the Rust parquet writer. - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Returns: the metrics from delete. """ metrics = self._table.delete( - predicate, writer_properties, custom_metadata, post_commithook_properties, max_commit_retries + predicate, + writer_properties, + commit_properties.custom_metadata if commit_properties else None, + post_commithook_properties, + commit_properties.max_commit_retries if commit_properties else None, ) return json.loads(metrics) def repair( self, dry_run: bool = False, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> Dict[str, Any]: """Repair the Delta Table by auditing active files that do not exist in the underlying filesystem and removes them. This can be useful when there are accidental deletions or corrupted files. @@ -1295,9 +1324,8 @@ def repair( Args: dry_run: when activated, list only the files, otherwise add remove actions to transaction log. Defaults to False. - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Returns: The metrics from repair (FSCK) action. @@ -1314,7 +1342,10 @@ def repair( ``` """ metrics = self._table.repair( - dry_run, custom_metadata, post_commithook_properties, max_commit_retries + dry_run, + commit_properties.custom_metadata if commit_properties else None, + post_commithook_properties, + commit_properties.max_commit_retries if commit_properties else None, ) return json.loads(metrics) @@ -1704,17 +1735,15 @@ def __init__(self, table: DeltaTable) -> None: def add_columns( self, fields: Union[DeltaField, List[DeltaField]], - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> None: """Add new columns and/or update the fields of a stuctcolumn Args: fields: fields to merge into schema - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Example: ```python @@ -1734,24 +1763,25 @@ def add_columns( fields = [fields] self.table._table.add_columns( - fields, custom_metadata, post_commithook_properties, max_commit_retries + fields, + commit_properties.custom_metadata if commit_properties else None, + post_commithook_properties, + commit_properties.max_commit_retries if commit_properties else None, ) def add_constraint( self, constraints: Dict[str, str], - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> None: """ Add constraints to the table. Limited to `single constraint` at once. Args: constraints: mapping of constraint name to SQL-expression to evaluate on write - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Example: ```python @@ -1775,16 +1805,18 @@ def add_constraint( ) self.table._table.add_constraints( - constraints, custom_metadata, post_commithook_properties, max_commit_retries + constraints, + commit_properties.custom_metadata if commit_properties else None, + post_commithook_properties, + commit_properties.max_commit_retries if commit_properties else None, ) def drop_constraint( self, name: str, raise_if_not_exists: bool = True, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> None: """ Drop constraints from a table. Limited to `single constraint` at once. @@ -1792,9 +1824,8 @@ def drop_constraint( Args: name: constraint name which to drop. raise_if_not_exists: set if should raise if not exists. - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Example: ```python @@ -1816,15 +1847,18 @@ def drop_constraint( ``` """ self.table._table.drop_constraints( - name, raise_if_not_exists, custom_metadata, post_commithook_properties, max_commit_retries, + name, + raise_if_not_exists, + commit_properties.custom_metadata if commit_properties else None, + post_commithook_properties, + commit_properties.max_commit_retries if commit_properties else None, ) def set_table_properties( self, properties: Dict[str, str], raise_if_not_exists: bool = True, - custom_metadata: Optional[Dict[str, str]] = None, - max_commit_retries: Optional[int] = None, + commit_properties: Optional[CommitProperties] = None, ) -> None: """ Set properties from the table. @@ -1832,8 +1866,7 @@ def set_table_properties( Args: properties: properties which set raise_if_not_exists: set if should raise if not exists. - custom_metadata: custom metadata that will be added to the transaction commit. - max_commit_retries: maximum number of times to retry the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. Example: ```python @@ -1851,7 +1884,10 @@ def set_table_properties( ``` """ self.table._table.set_table_properties( - properties, raise_if_not_exists, custom_metadata, max_commit_retries + properties, + raise_if_not_exists, + commit_properties.custom_metadata if commit_properties else None, + commit_properties.max_commit_retries if commit_properties else None, ) @@ -1868,9 +1904,8 @@ def compact( max_concurrent_tasks: Optional[int] = None, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> Dict[str, Any]: """ Compacts small files to reduce the total number of files in the table. @@ -1893,9 +1928,8 @@ def compact( created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you want a commit per partition. writer_properties: Pass writer properties to the Rust parquet writer. - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Returns: the metrics from optimize @@ -1925,9 +1959,9 @@ def compact( max_concurrent_tasks, min_commit_interval, writer_properties, - custom_metadata, + commit_properties.custom_metadata if commit_properties else None, post_commithook_properties, - max_commit_retries, + commit_properties.max_commit_retries if commit_properties else None, ) self.table.update_incremental() return json.loads(metrics) @@ -1941,9 +1975,8 @@ def z_order( max_spill_size: int = 20 * 1024 * 1024 * 1024, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> Dict[str, Any]: """ Reorders the data using a Z-order curve to improve data skipping. @@ -1964,9 +1997,8 @@ def z_order( created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you want a commit per partition. writer_properties: Pass writer properties to the Rust parquet writer. - custom_metadata: custom metadata that will be added to the transaction commit. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. Returns: the metrics from optimize @@ -1998,9 +2030,9 @@ def z_order( max_spill_size, min_commit_interval, writer_properties, - custom_metadata, + commit_properties.custom_metadata if commit_properties else None, post_commithook_properties, - max_commit_retries, + commit_properties.max_commit_retries if commit_properties else None, ) self.table.update_incremental() return json.loads(metrics) diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py index c3cdd92473..3ae5c5084e 100644 --- a/python/deltalake/writer.py +++ b/python/deltalake/writer.py @@ -52,6 +52,7 @@ MAX_SUPPORTED_PYARROW_WRITER_VERSION, NOT_SUPPORTED_PYARROW_WRITER_VERSIONS, SUPPORTED_WRITER_FEATURES, + CommitProperties, DeltaTable, PostCommitHookProperties, WriterProperties, @@ -122,9 +123,8 @@ def write_deltalake( partition_filters: Optional[List[Tuple[str, str, Any]]] = ..., large_dtypes: bool = ..., engine: Literal["pyarrow"] = ..., - custom_metadata: Optional[Dict[str, str]] = ..., + commit_properties: Optional[CommitProperties] = ..., post_commithook_properties: Optional[PostCommitHookProperties] = ..., - max_commit_retries: Optional[int] = ..., ) -> None: ... @@ -152,9 +152,8 @@ def write_deltalake( large_dtypes: bool = ..., engine: Literal["rust"] = ..., writer_properties: WriterProperties = ..., - custom_metadata: Optional[Dict[str, str]] = ..., + commit_properties: Optional[CommitProperties] = ..., post_commithook_properties: Optional[PostCommitHookProperties] = ..., - max_commit_retries: Optional[int] = ..., ) -> None: ... @@ -184,9 +183,8 @@ def write_deltalake( large_dtypes: bool = ..., engine: Literal["rust"] = ..., writer_properties: WriterProperties = ..., - custom_metadata: Optional[Dict[str, str]] = ..., + commit_properties: Optional[CommitProperties] = ..., post_commithook_properties: Optional[PostCommitHookProperties] = ..., - max_commit_retries: Optional[int] = ..., ) -> None: ... @@ -222,9 +220,8 @@ def write_deltalake( large_dtypes: bool = False, engine: Literal["pyarrow", "rust"] = "rust", writer_properties: Optional[WriterProperties] = None, - custom_metadata: Optional[Dict[str, str]] = None, + commit_properties: Optional[CommitProperties] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, - max_commit_retries: Optional[int] = None, ) -> None: """Write to a Delta Lake table @@ -279,9 +276,8 @@ def write_deltalake( large_dtypes: Only used for pyarrow engine engine: writer engine to write the delta table. PyArrow engine is deprecated, and will be removed in v1.0. writer_properties: Pass writer properties to the Rust parquet writer. - custom_metadata: Custom metadata to add to the commitInfo. + commit_properties: properties of the transaction commit. If None, default values are used. post_commithook_properties: properties for the post commit hook. If None, default values are used. - max_commit_retries: maximum number of times to retry the transaction commit. """ table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options) if table is not None: @@ -322,9 +318,13 @@ def write_deltalake( configuration=configuration, storage_options=storage_options, writer_properties=writer_properties, - custom_metadata=custom_metadata, + custom_metadata=commit_properties.custom_metadata + if commit_properties + else None, post_commithook_properties=post_commithook_properties, - max_commit_retries=max_commit_retries, + max_commit_retries=commit_properties.max_commit_retries + if commit_properties + else None, ) if table: table.update_incremental() @@ -547,7 +547,7 @@ def validate_batch(batch: pa.RecordBatch) -> pa.RecordBatch: description, configuration, storage_options, - custom_metadata, + commit_properties.custom_metadata if commit_properties else None, ) else: table._table.create_write_transaction( @@ -556,9 +556,13 @@ def validate_batch(batch: pa.RecordBatch) -> pa.RecordBatch: partition_by or [], schema, partition_filters, - custom_metadata, + custom_metadata=commit_properties.custom_metadata + if commit_properties + else None, post_commithook_properties=post_commithook_properties, - max_commit_retries=max_commit_retries, + max_commit_retries=commit_properties.max_commit_retries + if commit_properties + else None, ) table.update_incremental() else: diff --git a/python/tests/test_alter.py b/python/tests/test_alter.py index b931939348..65ac7e07ac 100644 --- a/python/tests/test_alter.py +++ b/python/tests/test_alter.py @@ -7,6 +7,7 @@ from deltalake import DeltaTable, write_deltalake from deltalake.exceptions import DeltaError, DeltaProtocolError from deltalake.schema import Field, PrimitiveType, StructType +from deltalake.table import CommitProperties def test_add_constraint(tmp_path: pathlib.Path, sample_table: pa.Table): @@ -58,8 +59,9 @@ def test_add_constraint_roundtrip_metadata( dt = DeltaTable(tmp_path) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) dt.alter.add_constraint( - {"check_price2": "price >= 0"}, custom_metadata={"userName": "John Doe"} + {"check_price2": "price >= 0"}, commit_properties=commit_properties ) assert dt.history(1)[0]["userName"] == "John Doe" @@ -112,7 +114,8 @@ def test_drop_constraint_roundtrip_metadata( dt = DeltaTable(tmp_path) dt.alter.add_constraint({"check_price2": "price >= 0"}) - dt.alter.drop_constraint("check_price2", custom_metadata={"userName": "John Doe"}) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) + dt.alter.drop_constraint("check_price2", commit_properties=commit_properties) assert dt.history(1)[0]["userName"] == "John Doe" diff --git a/python/tests/test_delete.py b/python/tests/test_delete.py index 65b5ebdec3..9d93b9f95f 100644 --- a/python/tests/test_delete.py +++ b/python/tests/test_delete.py @@ -4,14 +4,15 @@ import pyarrow.compute as pc import pytest -from deltalake.table import DeltaTable +from deltalake.table import CommitProperties, DeltaTable from deltalake.writer import write_deltalake def test_delete_no_predicates(existing_table: DeltaTable): old_version = existing_table.version() - existing_table.delete(custom_metadata={"userName": "John Doe"}) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) + existing_table.delete(commit_properties=commit_properties) last_action = existing_table.history(1)[0] assert last_action["operation"] == "DELETE" diff --git a/python/tests/test_merge.py b/python/tests/test_merge.py index ea13adf85b..54c2726fd3 100644 --- a/python/tests/test_merge.py +++ b/python/tests/test_merge.py @@ -4,6 +4,7 @@ import pytest from deltalake import DeltaTable, write_deltalake +from deltalake.table import CommitProperties def test_merge_when_matched_delete_wo_predicate( @@ -20,12 +21,13 @@ def test_merge_when_matched_delete_wo_predicate( } ) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) dt.merge( source=source_table, predicate="t.id = s.id", source_alias="s", target_alias="t", - custom_metadata={"userName": "John Doe"}, + commit_properties=commit_properties, ).when_matched_delete().execute() nrows = 4 diff --git a/python/tests/test_optimize.py b/python/tests/test_optimize.py index 8cb0902dae..2c9685e116 100644 --- a/python/tests/test_optimize.py +++ b/python/tests/test_optimize.py @@ -5,6 +5,7 @@ import pytest from deltalake import DeltaTable, write_deltalake +from deltalake.table import CommitProperties @pytest.mark.parametrize("engine", ["pyarrow", "rust"]) @@ -39,7 +40,8 @@ def test_optimize_run_table( old_data = dt.to_pyarrow_table() old_version = dt.version() - dt.optimize.compact(custom_metadata={"userName": "John Doe"}) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) + dt.optimize.compact(commit_properties=commit_properties) new_data = dt.to_pyarrow_table() last_action = dt.history(1)[0] @@ -70,9 +72,8 @@ def test_z_order_optimize( dt = DeltaTable(tmp_path) old_version = dt.version() - dt.optimize.z_order( - ["date32", "timestamp"], custom_metadata={"userName": "John Doe"} - ) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) + dt.optimize.z_order(["date32", "timestamp"], commit_properties=commit_properties) last_action = dt.history(1)[0] assert last_action["operation"] == "OPTIMIZE" assert last_action["userName"] == "John Doe" diff --git a/python/tests/test_repair.py b/python/tests/test_repair.py index 1d4a6adfa8..634bcbd441 100644 --- a/python/tests/test_repair.py +++ b/python/tests/test_repair.py @@ -1,6 +1,7 @@ import os from deltalake import DeltaTable, write_deltalake +from deltalake.table import CommitProperties def test_repair_with_dry_run(tmp_path, sample_data): @@ -23,7 +24,8 @@ def test_repair_wo_dry_run(tmp_path, sample_data): dt = DeltaTable(tmp_path) os.remove(dt.file_uris()[0]) - metrics = dt.repair(dry_run=False, custom_metadata={"userName": "John Doe"}) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) + metrics = dt.repair(dry_run=False, commit_properties=commit_properties) last_action = dt.history(1)[0] assert len(metrics["files_removed"]) == 1 diff --git a/python/tests/test_restore.py b/python/tests/test_restore.py index 099b887726..47fd5c21de 100644 --- a/python/tests/test_restore.py +++ b/python/tests/test_restore.py @@ -5,6 +5,7 @@ import pytest from deltalake import DeltaTable, write_deltalake +from deltalake.table import CommitProperties @pytest.mark.parametrize("use_relative", [True, False]) @@ -24,7 +25,8 @@ def test_restore_with_version( dt = DeltaTable(table_path) old_version = dt.version() - dt.restore(1, custom_metadata={"userName": "John Doe"}) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) + dt.restore(1, commit_properties=commit_properties) last_action = dt.history(1)[0] assert last_action["operation"] == "RESTORE" assert last_action["userName"] == "John Doe" diff --git a/python/tests/test_update.py b/python/tests/test_update.py index 554cc276b5..3ae39dadae 100644 --- a/python/tests/test_update.py +++ b/python/tests/test_update.py @@ -4,6 +4,7 @@ import pytest from deltalake import DeltaTable, write_deltalake +from deltalake.table import CommitProperties @pytest.fixture() @@ -38,10 +39,11 @@ def test_update_with_predicate(tmp_path: pathlib.Path, sample_table: pa.Table): } ) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) dt.update( updates={"deleted": "True"}, predicate="price > 3", - custom_metadata={"userName": "John Doe"}, + commit_properties=commit_properties, ) result = dt.to_pyarrow_table() diff --git a/python/tests/test_vacuum.py b/python/tests/test_vacuum.py index 44c2195e17..6f1b1dd7c5 100644 --- a/python/tests/test_vacuum.py +++ b/python/tests/test_vacuum.py @@ -5,6 +5,7 @@ import pytest from deltalake import DeltaTable, write_deltalake +from deltalake.table import CommitProperties def test_vacuum_dry_run_simple_table(): @@ -72,11 +73,12 @@ def test_vacuum_transaction_log(tmp_path: pathlib.Path, sample_data: pa.Table): dt = DeltaTable(tmp_path) + commit_properties = CommitProperties(custom_metadata={"userName": "John Doe"}) dt.vacuum( retention_hours=0, dry_run=False, enforce_retention_duration=False, - custom_metadata={"userName": "John Doe"}, + commit_properties=commit_properties, ) dt = DeltaTable(tmp_path)