From 54f1d6ac16604238fd00358beca1e04022d360b3 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Thu, 27 Feb 2025 22:43:57 +0100 Subject: [PATCH 1/2] Add rechunk function This function could then be overloaded by wrapping packges. --- src/rechunk.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/rechunk.jl b/src/rechunk.jl index f275347..fd3a97e 100644 --- a/src/rechunk.jl +++ b/src/rechunk.jl @@ -14,6 +14,13 @@ struct RechunkedDiskArray{T,N,A<:AbstractArray{T,N},C<:GridChunks} <: AbstractDi chunks::C end +""" + rechunk(A::AbstractArray, chunks) +Rechunk the underlying data of A into the given `chunks`. + +""" +rechunk(data::AbstractDiskArray, chunks::GridChunks) = RechunkedDiskArray(data , chunks) + Base.parent(A::RechunkedDiskArray) = A.parent Base.size(A::RechunkedDiskArray) = size(parent(A)) From 4e1bda7e868321325038ceaf4028b8ed61791ddf Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Mon, 3 Mar 2025 10:01:27 +0100 Subject: [PATCH 2/2] Improve docstring of rechunk --- src/rechunk.jl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/rechunk.jl b/src/rechunk.jl index fd3a97e..1572bf4 100644 --- a/src/rechunk.jl +++ b/src/rechunk.jl @@ -4,7 +4,7 @@ RechunkedDiskArray(parent::AbstractArray, chunks::GridChunks) A disk array that forces a specific chunk pattern, -regardless of the true chunk pattern of the parnet array. +regardless of the true chunk pattern of the parent array. This is useful in `zip` and other operations that can iterate over multiple arrays with different patterns. @@ -15,11 +15,18 @@ struct RechunkedDiskArray{T,N,A<:AbstractArray{T,N},C<:GridChunks} <: AbstractDi end """ - rechunk(A::AbstractArray, chunks) -Rechunk the underlying data of A into the given `chunks`. + rechunk(data::AbstractArray,chunks) +Change the chunks of the underlying DiskArray. Note that this will not change the chunking of the underlying data itself, it will just make the data +"look" like it had a different chunking. If you need a persistent on-disk representation of this chunking, save the resulting array. + +The chunks argument can take one of the following forms: + +• a DiskArrays.GridChunks object + +• a tuple specifying the chunk size along each dimension """ -rechunk(data::AbstractDiskArray, chunks::GridChunks) = RechunkedDiskArray(data , chunks) +rechunk(data::AbstractDiskArray, chunks::GridChunks) = RechunkedDiskArray(data, chunks) Base.parent(A::RechunkedDiskArray) = A.parent Base.size(A::RechunkedDiskArray) = size(parent(A))