Skip to content

Commit

Permalink
expose in python
Browse files Browse the repository at this point in the history
  • Loading branch information
wjones127 committed Jun 18, 2023
1 parent a96af85 commit 51d6ba4
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
8 changes: 7 additions & 1 deletion python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,7 @@ def z_order(
partition_filters: Optional[FilterType] = None,
target_size: Optional[int] = None,
max_concurrent_tasks: Optional[int] = None,
max_spill_size: int = 20 * 1024 * 1024 * 1024,
) -> Dict[str, Any]:
"""
Reorders the data using a Z-order curve to improve data skipping.
Expand All @@ -692,10 +693,15 @@ def z_order(
:param max_concurrent_tasks: the maximum number of concurrent tasks to use for
file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction
faster, but will also use more memory.
:param max_spill_size: the maximum number of bytes to spill to disk. Defaults to 20GB.
:return: the metrics from optimize
"""
metrics = self.table._table.z_order_optimize(
list(columns), partition_filters, target_size, max_concurrent_tasks
list(columns),
partition_filters,
target_size,
max_concurrent_tasks,
max_spill_size,
)
self.table.update_incremental()
return json.loads(metrics)
4 changes: 3 additions & 1 deletion python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,16 +291,18 @@ impl RawDeltaTable {
}

/// Run z-order variation of optimize
#[pyo3(signature = (z_order_columns, partition_filters = None, target_size = None, max_concurrent_tasks = None))]
#[pyo3(signature = (z_order_columns, partition_filters = None, target_size = None, max_concurrent_tasks = None, max_spill_size = 20 * 1024 * 1024 * 1024))]
pub fn z_order_optimize(
&mut self,
z_order_columns: Vec<String>,
partition_filters: Option<Vec<(&str, &str, PartitionFilterValue)>>,
target_size: Option<i64>,
max_concurrent_tasks: Option<usize>,
max_spill_size: usize,
) -> PyResult<String> {
let mut cmd = OptimizeBuilder::new(self._table.object_store(), self._table.state.clone())
.with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get))
.with_max_spill_size(max_spill_size)
.with_type(OptimizeType::ZOrder(z_order_columns));
if let Some(size) = target_size {
cmd = cmd.with_target_size(size);
Expand Down

0 comments on commit 51d6ba4

Please sign in to comment.