diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 55998c19a1..689195d08c 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -761,8 +761,9 @@ def vacuum( retention_hours: Optional[int] = None, dry_run: bool = True, enforce_retention_duration: bool = True, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> List[str]: """ Run the Vacuum command on the Delta Table: list and delete files no longer referenced by the Delta table and are older than the retention threshold. @@ -771,11 +772,19 @@ def vacuum( retention_hours: the retention threshold in hours, if none then the value from `delta.deletedFileRetentionDuration` is used or default of 1 week otherwise. dry_run: when activated, list only the files, delete otherwise enforce_retention_duration: when disabled, accepts retention hours smaller than the value from `delta.deletedFileRetentionDuration`. - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Returns: the list of files no longer referenced by the Delta Table and are older than the retention threshold. """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if retention_hours: if retention_hours < 0: raise ValueError("The retention periods should be positive.") @@ -798,8 +807,9 @@ def update( predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, error_on_type_mismatch: bool = True, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> Dict[str, Any]: """`UPDATE` records in the Delta Table that matches an optional predicate. Either updates or new_values needs to be passed for it to execute. @@ -810,8 +820,9 @@ def update( predicate: a logical expression. writer_properties: Pass writer properties to the Rust parquet writer. error_on_type_mismatch: specify if update will return error if data types are mismatching :default = True - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Returns: the metrics from update @@ -853,6 +864,13 @@ def update( {'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...} ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if updates is None and new_values is not None: updates = {} for key, value in new_values.items(): @@ -936,8 +954,9 @@ def merge( error_on_type_mismatch: bool = True, writer_properties: Optional[WriterProperties] = None, large_dtypes: Optional[bool] = None, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> "TableMerger": """Pass the source data which you want to merge on the target delta table, providing a predicate in SQL query like format. You can also specify on what to do when the underlying data types do not @@ -952,13 +971,20 @@ def merge( writer_properties: Pass writer properties to the Rust parquet writer large_dtypes: Deprecated, will be removed in 1.0 arrow_schema_conversion_mode: Large converts all types of data schema into Large Arrow types, passthrough keeps string/binary/list types untouched - custom_metadata: properties for the commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. - + commit_properties: properties for the commit. If None, default values are used. Returns: TableMerger: TableMerger Object """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if large_dtypes: warnings.warn( "large_dtypes is deprecated", @@ -1018,6 +1044,7 @@ def restore( *, ignore_missing_files: bool = False, protocol_downgrade_allowed: bool = False, + custom_metadata: Optional[Dict[str, str]] = None, commit_properties: Optional[CommitProperties] = None, ) -> Dict[str, Any]: """ @@ -1027,11 +1054,19 @@ def restore( target: the expected version will restore, which represented by int, date str or datetime. ignore_missing_files: whether the operation carry on when some data files missing. protocol_downgrade_allowed: whether the operation when protocol version upgraded. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. commit_properties: properties of the transaction commit. If None, default values are used. Returns: the metrics from restore. """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if isinstance(target, datetime): metrics = self._table.restore( target.isoformat(), @@ -1281,8 +1316,9 @@ def delete( self, predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> Dict[str, Any]: """Delete records from a Delta Table that statisfy a predicate. @@ -1294,12 +1330,20 @@ def delete( Args: predicate: a SQL where clause. If not passed, will delete all rows. writer_properties: Pass writer properties to the Rust parquet writer. - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Returns: the metrics from delete. """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + metrics = self._table.delete( predicate, writer_properties, @@ -1312,8 +1356,9 @@ def delete( def repair( self, dry_run: bool = False, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> Dict[str, Any]: """Repair the Delta Table by auditing active files that do not exist in the underlying filesystem and removes them. This can be useful when there are accidental deletions or corrupted files. @@ -1324,8 +1369,9 @@ def repair( Args: dry_run: when activated, list only the files, otherwise add remove actions to transaction log. Defaults to False. - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Returns: The metrics from repair (FSCK) action. @@ -1341,6 +1387,13 @@ def repair( {'dry_run': False, 'files_removed': ['6-0d084325-6885-4847-b008-82c1cf30674c-0.parquet', 5-4fba1d3e-3e20-4de1-933d-a8e13ac59f53-0.parquet']} ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + metrics = self._table.repair( dry_run, commit_properties.custom_metadata if commit_properties else None, @@ -1735,15 +1788,17 @@ def __init__(self, table: DeltaTable) -> None: def add_columns( self, fields: Union[DeltaField, List[DeltaField]], - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> None: """Add new columns and/or update the fields of a stuctcolumn Args: fields: fields to merge into schema - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Example: ```python @@ -1759,6 +1814,13 @@ def add_columns( ) ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if isinstance(fields, DeltaField): fields = [fields] @@ -1772,16 +1834,18 @@ def add_columns( def add_constraint( self, constraints: Dict[str, str], - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> None: """ Add constraints to the table. Limited to `single constraint` at once. Args: constraints: mapping of constraint name to SQL-expression to evaluate on write - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Example: ```python @@ -1798,6 +1862,13 @@ def add_constraint( {'delta.constraints.value_gt_5': 'value > 5'} ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if len(constraints.keys()) > 1: raise ValueError( """add_constraints is limited to a single constraint addition at once for now. @@ -1815,8 +1886,9 @@ def drop_constraint( self, name: str, raise_if_not_exists: bool = True, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> None: """ Drop constraints from a table. Limited to `single constraint` at once. @@ -1824,8 +1896,9 @@ def drop_constraint( Args: name: constraint name which to drop. raise_if_not_exists: set if should raise if not exists. - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Example: ```python @@ -1846,6 +1919,13 @@ def drop_constraint( {} ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + self.table._table.drop_constraints( name, raise_if_not_exists, @@ -1858,6 +1938,7 @@ def set_table_properties( self, properties: Dict[str, str], raise_if_not_exists: bool = True, + custom_metadata: Optional[Dict[str, str]] = None, commit_properties: Optional[CommitProperties] = None, ) -> None: """ @@ -1866,6 +1947,7 @@ def set_table_properties( Args: properties: properties which set raise_if_not_exists: set if should raise if not exists. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. commit_properties: properties of the transaction commit. If None, default values are used. Example: @@ -1883,6 +1965,13 @@ def set_table_properties( dt.alter.set_table_properties({"delta.enableChangeDataFeed": "true"}) ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + self.table._table.set_table_properties( properties, raise_if_not_exists, @@ -1904,8 +1993,9 @@ def compact( max_concurrent_tasks: Optional[int] = None, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> Dict[str, Any]: """ Compacts small files to reduce the total number of files in the table. @@ -1928,8 +2018,9 @@ def compact( created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you want a commit per partition. writer_properties: Pass writer properties to the Rust parquet writer. - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Returns: the metrics from optimize @@ -1950,6 +2041,13 @@ def compact( {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 1, 'numBatches': 2, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True} ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if isinstance(min_commit_interval, timedelta): min_commit_interval = int(min_commit_interval.total_seconds()) @@ -1975,8 +2073,9 @@ def z_order( max_spill_size: int = 20 * 1024 * 1024 * 1024, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> Dict[str, Any]: """ Reorders the data using a Z-order curve to improve data skipping. @@ -1997,8 +2096,9 @@ def z_order( created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you want a commit per partition. writer_properties: Pass writer properties to the Rust parquet writer. - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. Returns: the metrics from optimize @@ -2019,6 +2119,13 @@ def z_order( {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 0, 'numBatches': 1, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True} ``` """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + if isinstance(min_commit_interval, timedelta): min_commit_interval = int(min_commit_interval.total_seconds()) diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py index 3ae5c5084e..015d2f22af 100644 --- a/python/deltalake/writer.py +++ b/python/deltalake/writer.py @@ -123,8 +123,9 @@ def write_deltalake( partition_filters: Optional[List[Tuple[str, str, Any]]] = ..., large_dtypes: bool = ..., engine: Literal["pyarrow"] = ..., - commit_properties: Optional[CommitProperties] = ..., + custom_metadata: Optional[Dict[str, str]] = ..., post_commithook_properties: Optional[PostCommitHookProperties] = ..., + commit_properties: Optional[CommitProperties] = ..., ) -> None: ... @@ -152,8 +153,9 @@ def write_deltalake( large_dtypes: bool = ..., engine: Literal["rust"] = ..., writer_properties: WriterProperties = ..., - commit_properties: Optional[CommitProperties] = ..., + custom_metadata: Optional[Dict[str, str]] = ..., post_commithook_properties: Optional[PostCommitHookProperties] = ..., + commit_properties: Optional[CommitProperties] = ..., ) -> None: ... @@ -183,8 +185,9 @@ def write_deltalake( large_dtypes: bool = ..., engine: Literal["rust"] = ..., writer_properties: WriterProperties = ..., - commit_properties: Optional[CommitProperties] = ..., + custom_metadata: Optional[Dict[str, str]] = ..., post_commithook_properties: Optional[PostCommitHookProperties] = ..., + commit_properties: Optional[CommitProperties] = ..., ) -> None: ... @@ -220,8 +223,9 @@ def write_deltalake( large_dtypes: bool = False, engine: Literal["pyarrow", "rust"] = "rust", writer_properties: Optional[WriterProperties] = None, - commit_properties: Optional[CommitProperties] = None, + custom_metadata: Optional[Dict[str, str]] = None, post_commithook_properties: Optional[PostCommitHookProperties] = None, + commit_properties: Optional[CommitProperties] = None, ) -> None: """Write to a Delta Lake table @@ -276,9 +280,17 @@ def write_deltalake( large_dtypes: Only used for pyarrow engine engine: writer engine to write the delta table. PyArrow engine is deprecated, and will be removed in v1.0. writer_properties: Pass writer properties to the Rust parquet writer. - commit_properties: properties of the transaction commit. If None, default values are used. + custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead. post_commithook_properties: properties for the post commit hook. If None, default values are used. + commit_properties: properties of the transaction commit. If None, default values are used. """ + if custom_metadata: + warnings.warn( + "custom_metadata is deprecated, please use commit_properties instead.", + category=DeprecationWarning, + stacklevel=2, + ) + table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options) if table is not None: storage_options = table._storage_options or {}