Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use _ARRAY_DIMENSIONS to create shared, named dimensions in ZarrHeader #1325

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 145 additions & 15 deletions cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,23 @@
package ucar.nc2.iosp.zarr;

import com.fasterxml.jackson.databind.ObjectMapper;

import ucar.ma2.Array;
import ucar.ma2.ArrayObject;
import ucar.ma2.ArrayString;
import ucar.nc2.Attribute;
import ucar.nc2.Dimension;
import ucar.nc2.Group;
import ucar.ma2.Index;
import ucar.nc2.Variable;
import ucar.nc2.filter.Filter;
import ucar.unidata.io.RandomAccessFile;
import ucar.unidata.io.zarr.RandomAccessDirectory;
import ucar.unidata.io.zarr.RandomAccessDirectoryItem;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.ByteOrder;
import java.util.*;
Expand All @@ -24,11 +32,16 @@
*/
public class ZarrHeader {

private static final Logger logger = LoggerFactory.getLogger(ZarrHeader.class);

private final RandomAccessDirectory rootRaf;
private final Group.Builder rootGroup;
private final String rootLocation;
private static ObjectMapper objectMapper = new ObjectMapper();

/*
*
*/
public ZarrHeader(RandomAccessDirectory raf, Group.Builder rootGroup) {
this.rootRaf = raf;
this.rootGroup = rootGroup;
Expand All @@ -47,10 +60,16 @@ private class DelayedVarMaker {
private List<Attribute> attrs; // list of variable attributes
private long dataOffset; // byte position where data starts

/*
*
*/
void setAttrs(List<Attribute> attrs) {
this.attrs = attrs;
}

/*
*
*/
void setVar(RandomAccessDirectoryItem var) {
this.var = var;
this.attrs = null;
Expand All @@ -63,14 +82,16 @@ void setVar(RandomAccessDirectoryItem var) {
raf.seek(0); // reset in case file has previously been opened by another iosp
this.zarray = objectMapper.readValue(raf, ZArray.class);
} catch (IOException | ClassCastException ex) {
ZarrIosp.logger.error(new ZarrFormatException(ex.getMessage()).getMessage());
logger.error(new ZarrFormatException(ex.getMessage()).getMessage());
// skip var if metadata invalid
this.var = null;
}
}
}

// check if attribute file belongs to current variable
/*
* check if attribute file belongs to current variable
*/
boolean myAttrs(RandomAccessDirectoryItem attrs) {
if (var == null || attrs == null) {
return false;
Expand All @@ -81,14 +102,17 @@ boolean myAttrs(RandomAccessDirectoryItem attrs) {
return ZarrUtils.getObjectNameFromPath(attrPath).equals(ZarrUtils.getObjectNameFromPath(varPath));
}

/*
*
*/
void processItem(RandomAccessDirectoryItem item) {
if (var == null) {
return;
}
// get index of chunks
int index = getChunkIndex(item, this.zarray);
if (index < 0) { // not data files, skip rest of var
ZarrIosp.logger.error(new ZarrFormatException().getMessage());
logger.error(new ZarrFormatException().getMessage());
this.var = null; // skip rest of var is unrecognized files found
}
this.initializedChunks.put(index, item.length());
Expand All @@ -98,22 +122,25 @@ void processItem(RandomAccessDirectoryItem item) {
}
}

/*
*
*/
void makeVar() {
if (var == null) {
return; // do nothing if no variable is in progress
}
try {
makeVariable(var, dataOffset, zarray, initializedChunks, attrs);
} catch (ZarrFormatException ex) {
ZarrIosp.logger.error(ex.getMessage());
logger.error(ex.getMessage());
}
var = null; // reset var
}
}

/**
* Create CDM object on 'rootGroup' from RandomAccessFile
*
*
* @throws IOException
*/
public void read() throws IOException {
Expand All @@ -124,8 +151,10 @@ public void read() throws IOException {

for (RandomAccessDirectoryItem item : items) {
String filepath = ZarrUtils.trimLocation(item.getLocation());

if (filepath.endsWith(ZarrKeys.ZATTRS)) { // attributes
List<Attribute> attrs = makeAttributes(item);

// assign attrs to either variable or group
if (delayedVarMaker.myAttrs(item)) {
delayedVarMaker.setAttrs(attrs);
Expand All @@ -134,16 +163,22 @@ public void read() throws IOException {
delayedVarMaker.makeVar();
grp_attrs = attrs;
}

} else if (filepath.endsWith(ZarrKeys.ZMETADATA)) { // possible consolidated metadata in root group
logger.trace("encountered .zmetadata; not yet coded for");

} else if (filepath.endsWith(ZarrKeys.ZGROUP)) { // groups
// build any vars in progress
delayedVarMaker.makeVar();
makeGroup(item, grp_attrs); // .zattrs will always be processed before .zgroup, so we can make group immediately
grp_attrs = null; // reset

} else if (filepath.endsWith(ZarrKeys.ZARRAY)) { // variables
// build any vars in progress
delayedVarMaker.makeVar();
// set up variable to be created after processing the rest of the files in the folder
delayedVarMaker.setVar(item);

} else {
delayedVarMaker.processItem(item);
}
Expand All @@ -152,6 +187,9 @@ public void read() throws IOException {
delayedVarMaker.makeVar();
}

/*
*
*/
private void makeGroup(RandomAccessDirectoryItem item, List<Attribute> attrs) {
// make new Group
Group.Builder group = Group.builder();
Expand All @@ -174,33 +212,111 @@ private void makeGroup(RandomAccessDirectoryItem item, List<Attribute> attrs) {
group.setParentGroup(parentGroup);
parentGroup.addGroup(group);
} catch (ZarrFormatException ex) {
ZarrIosp.logger.error(ex.getMessage());
logger.error(ex.getMessage());
}
}
}

/*
*
*/
private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArray zarray,
Map<Integer, Long> initializedChunks, List<Attribute> attrs) throws ZarrFormatException {
// make new Variable
Variable.Builder var = Variable.builder();
String location = ZarrUtils.trimLocation(item.getLocation());

// set var name
var.setName(ZarrUtils.getObjectNameFromPath(location));
String vname = ZarrUtils.getObjectNameFromPath(location);
var.setName(vname);
logger.trace("evaluating {}", vname);

// Check if var has named dimensions by looking for _ARRAY_DIMENSIONS attribute.
// This is the convention followed by xarray and geozarr.
// NOTE: The Nczarr spec allows for honoring or ignoring this attribute by specifying a mode.
// See under "Client Parameters" on https://docs.unidata.ucar.edu/nug/current/nczarr_head.html
// We do nothing to check how that's set.
String[] dimNames = null;
boolean hasNamedDimensions = false;

if (attrs != null) {

for (Attribute attr : attrs) {
final String attrName = attr.getName();
if ("_ARRAY_DIMENSIONS".equals(attrName)) {
try {
final ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues();

// getSize returns a long
final int aodSize = (int) aod1.getSize();
dimNames = new String[aodSize];

for (int i = 0; i < aodSize; ++i) {
dimNames[i] = (String) aod1.get(i);
}
hasNamedDimensions = true;
// logger.trace(" found _ARRAY_DIMENSIONS array {}", aod1);
} catch (final Exception exc) {
logger.debug(" Could not extract _ARRAY_DIMENSIONS for {}, {}", vname, exc.getMessage());
}

//// Informational logging
// } else if ("coordinates".equals(attrName) || "standard_name".equals(attrName) || "units".equals(attrName))
//// {
// try {
// ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues();
// String coordsStr = (String) aod1.get(0);
// logger.trace(" var {} has {} attr '{}'", vname, attrName, coordsStr);
// } catch (final Exception exc) {
// logger.debug(" Exception extracting {} attr value, {}", attrName, exc.getMessage());
// }

}
}
}

// set variable datatype
var.setDataType(zarray.getDataType());

// find variable's group or throw if non-existent.
final Group.Builder parentGroup = findGroup(location);

// create and set dimensions
// If hasNamedDimensions set above, we will want to share var's dimensions with the group.
int[] shape = zarray.getShape();
List<Dimension> dims = new ArrayList<>();
for (int d = 0; d < shape.length; d++) {
// TODO: revisit dimension props and names (especially for nczarr)
Dimension.Builder dim = Dimension.builder(String.format("dim%d", d), shape[d]);

if (hasNamedDimensions && shape.length != dimNames.length) {
throw new ZarrFormatException("Array " + vname + " has dimensions attribute count that does not match its rank.");
}

final List<Dimension> dims = new ArrayList<>();
for (int i = 0; i < shape.length; i++) {

final String dname = (hasNamedDimensions) ? dimNames[i] : String.format("dim%d", i);

final Dimension.Builder dim = Dimension.builder(dname, shape[i]);
dim.setIsVariableLength(false);
dim.setIsUnlimited(false);
dim.setIsShared(false);
dims.add(dim.build());

final Dimension dd = dim.build();

dims.add(dd);

if (hasNamedDimensions) {
Optional<Dimension> optd = parentGroup.findDimensionLocal(dname);

if (optd.isPresent()) {
final Dimension prevd = optd.get();

if (dd.getLength() != prevd.getLength()) {
throw new ZarrFormatException("Named dimension " + dname + " seen with inconsistent lengths.");
}
} else {
logger.trace("adding {} to group as a shared dimension", dname);
parentGroup.addDimension(dd);
}
}
}
var.addDimensions(dims);

Expand All @@ -215,16 +331,30 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra
zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks);
var.setSPobject(vinfo);

// Include some info from .zarray file in attributes for display when showing variable detail.
// Possibly add to this fill_value if in .zarray but not .zattrs?
if (attrs == null) {
attrs = new ArrayList<Attribute>();
}
final Filter compressor = zarray.getCompressor();
if (compressor == null) {
attrs.add(new Attribute("_Compressor", "none"));
} else {
attrs.add(new Attribute("_Compressor", zarray.getCompressor().getName()));
}

// add current attributes, if any exist
if (attrs != null) {
var.addAttributes(attrs);
}

// find variable's group or throw if non-existent
Group.Builder parentGroup = findGroup(location);
// Add var to parent.
parentGroup.addVariable(var);
}

/*
*
*/
private List<Attribute> makeAttributes(RandomAccessDirectoryItem item) {
// get RandomAccessFile for JSON parsing
try {
Expand Down Expand Up @@ -287,7 +417,7 @@ private static int getChunkIndex(RandomAccessDirectoryItem item, ZArray zarray)

/**
* Find Group builder matching provided name
*
*
* @throws ZarrFormatException if group is not found
*/
private Group.Builder findGroup(String location) throws ZarrFormatException {
Expand Down
1 change: 1 addition & 0 deletions cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public final class ZarrKeys {
public static final String ZARRAY = ".zarray";
public static final String ZATTRS = ".zattrs";
public static final String ZGROUP = ".zgroup";
public static final String ZMETADATA = ".zmetadata";

// key names
public static final String SHAPE = "shape";
Expand Down
Loading