Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Writing Dynamic Filters with DataPipe #428

Merged
merged 29 commits into from
Jul 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6b86d41
add registered filters enum
lawrence-mbf May 5, 2022
bbd5e24
add DynamicFilter property object
lawrence-mbf May 17, 2022
558d50e
Move DynamicFilter parameters to property
lawrence-mbf May 31, 2022
b74cca4
Tighten Dynamic Filter security and fix bugs
lawrence-mbf Jun 3, 2022
3517e68
add basic tutorial
lawrence-mbf Jun 3, 2022
0c8d03c
Add externalFilter keyword argument for datapipe
lawrence-mbf Jun 3, 2022
d7c999f
Add external filter tests
lawrence-mbf Jun 3, 2022
1bf09aa
test azure environment path fix
lawrence-mbf Jun 3, 2022
9c9adc2
Update azure-pipelines.yml
lawrence-mbf Jun 3, 2022
82dd978
Update azure-pipelines.yml
lawrence-mbf Jun 3, 2022
7d5c944
Clarify and clean up generated docs
lawrence-mbf Jun 20, 2022
ab287e2
Fix DynamicFilter equality check.
lawrence-mbf Jun 20, 2022
1b55751
Allow ignoring default datapipe compression
lawrence-mbf Jun 20, 2022
c40ebf8
Update dynamic filter write tutorials
lawrence-mbf Jun 20, 2022
76d3590
Try setting env directly in pipeline
lawrence-mbf Jul 14, 2022
0832e68
Distinguish extra filters and "regular" filters
lawrence-mbf Jul 14, 2022
0070dd8
test tests
lawrence-mbf Jul 14, 2022
ce748fb
disable filter in tests
lawrence-mbf Jul 14, 2022
84979ad
fix incorrect environment variable
lawrence-mbf Jul 14, 2022
2b70e9d
remove debug error in tests
lawrence-mbf Jul 14, 2022
7913f0e
Change external filter test to use an included one
lawrence-mbf Jul 14, 2022
5ae3598
DataPipe properties made heterogeneous
lawrence-mbf Jul 14, 2022
9f83aa0
Fix DataPipe logic error
lawrence-mbf Jul 14, 2022
875326f
Update external filters test
lawrence-mbf Jul 14, 2022
d456119
Fix getters/setters with external filters
lawrence-mbf Jul 14, 2022
4d227f8
Allow datapipe `filters` to gain precedence
lawrence-mbf Jul 22, 2022
c813674
Add warning for malformed datapipe keyword use.
lawrence-mbf Jul 22, 2022
dd1dac5
Fix dataPipe keyword argument warning
lawrence-mbf Jul 22, 2022
1d17bcc
use shuffle instead of bitshuffle. Clean up docs and use imports
bendichter Jul 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions +file/fillProps.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
if p.Results.IsRequired
requiredStr = 'REQUIRED';
else
requiredStr = 'OPTIONAL';
requiredStr = '';
end

proplines = cell(size(names));
Expand Down Expand Up @@ -80,13 +80,11 @@
end

if isa(prop, 'file.Dataset') || isa(prop, 'file.Attribute') || isa(prop, 'file.Group')
docStr = prop.doc;
propStr = sprintf('(%s) %s', typeStr, prop.doc);
else
docStr = '';
propStr = typeStr;
end

propStr = sprintf('(%s) %s', typeStr, docStr);

if nargin >= 2
propStr = [propName ' = ' propStr];
end
Expand Down
53 changes: 53 additions & 0 deletions +tests/+unit/dataPipeTest.m
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,59 @@ function testAppend(testCase)
testCase.verifyEqual(readData(1:3), [7, 8, 9] .');
end

function testExternalFilters(testCase)
import types.untyped.datapipe.dynamic.Filter;
import types.untyped.datapipe.properties.DynamicFilter;
import types.untyped.datapipe.properties.Shuffle;

testCase.assumeTrue(logical(H5Z.filter_avail(uint32(Filter.LZ4))));

filename = 'testExternalWrite.h5';

Pipe = types.untyped.DataPipe(...
'maxSize', [10 13 15],...
'axis', 3,...
'chunkSize', [10 13 1],...
'dataType', 'uint8',...
'filters', [Shuffle() DynamicFilter(Filter.LZ4)]);

OneDimensionPipe = types.untyped.DataPipe('maxSize', Inf, 'data', [7, 8, 9]);

%% create test file
fid = H5F.create(filename);

initialData = createData(Pipe.dataType, [10 13 10]);
Pipe.internal.data = initialData;
Pipe.export(fid, '/test_data', {}); % bind
OneDimensionPipe.export(fid, '/test_one_dim_data', {});

H5F.close(fid);

%% append data
totalLength = 3;
appendData = zeros([10 13 totalLength], Pipe.dataType);
for i = 1:totalLength
appendData(:,:,i) = createData(Pipe.dataType, Pipe.chunkSize);
Pipe.append(appendData(:,:,i));
end

for i = 1:totalLength
OneDimensionPipe.append(rand());
end

%% verify data
Pipe = types.untyped.DataPipe('filename', filename, 'path', '/test_data');
readData = Pipe.load();
testCase.verifyEqual(readData(:,:,1:10), initialData);
testCase.verifyEqual(readData(:,:,11:end), appendData);

OneDimensionPipe = types.untyped.DataPipe('filename', filename, 'path', '/test_one_dim_data');
readData = OneDimensionPipe.load();
testCase.verifyTrue(isvector(readData));
testCase.verifyEqual(length(readData), 6);
testCase.verifyEqual(readData(1:3), [7, 8, 9] .');
end

function data = createData(dataType, size)
data = randi(intmax(dataType), size, dataType);
end
36 changes: 36 additions & 0 deletions +types/+untyped/+datapipe/+dynamic/Filter.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
classdef Filter < uint64
%FILTER Compression filter registered to HDF5
% as defined by (https://portal.hdfgroup.org/display/support/Filters)
% Submit an issue if we're missing one you wish to use!

enumeration
SZ3 (32024)
CBF (32006)
SZ (32017)
BLOSC (32001)
BZIP2 (307)
JPEG_LS (32012)
VBZ (32020)
JPEG_XR (32007)
CCSDS_123 (32011)
FCIDECOMP (32018)
BitShuffle (32008)
FPZip (32014)
B3D (32016)
JPEG (32019)
LPC_Rice (32010)
LZ4 (32004)
LZF (32000)
LZO (305)
MAFISC (32002)
ZFP (32013)
ZStandard (32015)
APAX (32005)
Snappy (32003)
SPDP (32009)
BitGroom (32022)
GBR (32023) % Granular BitRound
FAPEC (32021)
end
end

59 changes: 59 additions & 0 deletions +types/+untyped/+datapipe/+properties/DynamicFilter.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
classdef DynamicFilter < types.untyped.datapipe.Property
%DYNAMIC Summary of this class goes here
% Detailed explanation goes here

properties (SetAccess = private)
dynamicFilter;
end

properties
parameters;
end

methods
function obj = DynamicFilter(filter, parameters)
validateattributes(filter, ...
{'types.untyped.datapipe.dynamic.Filter'}, ...
{'scalar'}, ...
'DynamicFilter', 'filter');
assert(~verLessThan('matlab', '9.12'), ...
['Your MATLAB version `%s` does not support writing with ' ...
'dynamically loaded filters. Please upgrade to version R2022a ' ...
'or higher in order to use this feature.'], version);
assert(H5Z.filter_avail(uint32(filter)), ...
['Filter `%s` does not appear to be installed on this system. ' ...
'Please doublecheck `%s` for more information about writing ' ...
'with third-party filters.'], ...
filter, ...
'https://www.mathworks.com/help/matlab/import_export/read-and-write-hdf5-datasets-using-dynamically-loaded-filters.html');

obj.dynamicFilter = filter;

if (1 < nargin)
obj.parameters = parameters;
else
obj.parameters = [];
end
end

function tf = isInDcpl(dcpl)
tf = false;

for i = 0:(H5P.get_nfilters(dcpl) - 1)
[id, ~, ~, ~, ~] = H5P.get_filter(dcpl, i);
if id == uint32(obj.dynamicFilter)
tf = true;
return;
end
end
end

function addTo(obj, dcpl)
H5P.set_filter( ...
dcpl, ...
uint32(obj.dynamicFilter), ...
'H5Z_FLAG_MANDATORY', ...
obj.parameters);
end
end
end
17 changes: 15 additions & 2 deletions +types/+untyped/+datapipe/BlueprintPipe.m
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,25 @@ function append(~, ~)
'Export the DataPipe to append.']);
end

function setPipeProperty(obj, prop)
function setPipeProperty(obj, prop)
assert(isa(prop, 'types.untyped.datapipe.Property'),...
'Can only add filters.');

isDynamicFilter = isa(prop, ...
'types.untyped.datapipe.properties.DynamicFilter');

% dedup pipe properties if it already exists with special case
% behavior for dynamic filters.
for i = 1:length(obj.pipeProperties)
if isa(prop, class(obj.pipeProperties{i}))
pipeProp = obj.pipeProperties{i};

isSameClass = isa(prop, class(pipeProp));

if isSameClass
if isDynamicFilter && prop.dynamicFilter ~= pipeProp.dynamicFilter
continue;
end

obj.pipeProperties{i} = prop;
return;
end
Expand Down
2 changes: 1 addition & 1 deletion +types/+untyped/+datapipe/Property.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
classdef Property < handle
classdef Property < handle & matlab.mixin.Heterogeneous
%PROPERTY used in datapipe creation

methods (Static, Abstract)
Expand Down
54 changes: 45 additions & 9 deletions +types/+untyped/DataPipe.m
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

properties (SetAccess = private)
internal;
filters;
end

properties (Dependent)
Expand All @@ -69,14 +70,20 @@
p.addParameter('axis', 1, @(x) isnumeric(x) && isscalar(x) && x > 0);
p.addParameter('offset', 0, @(x) isnumeric(x) && isscalar(x) && x >= 0);
p.addParameter('chunkSize', []);
% note that compression level is defaulted to ON
% This is primarily for legacy support as we move into other
% filters.
p.addParameter('compressionLevel', 3, @(x) isnumeric(x)...
&& isscalar(x)...
&& x >= -1);
p.addParameter('dataType', '');
p.addParameter('data', []);
p.addParameter('filename', '');
p.addParameter('path', '');
p.addParameter('hasShuffle', false);
p.addParameter('hasShuffle', false, ...
@(b) isscalar(b) && (islogical(b) || isnumeric(b)));
p.addParameter('filters', DynamicFilter.empty(), ...
@(x) isa(x, 'types.untyped.datapipe.Property'));
p.KeepUnmatched = true;
p.parse(varargin{:});

Expand Down Expand Up @@ -149,13 +156,31 @@
end
obj.internal.setPipeProperties(Chunking(chunkSize));

if ~isempty(p.Results.compressionLevel)
obj.internal.setPipeProperties(Compression(...
p.Results.compressionLevel));
hasFilters = ~isempty(p.Results.filters);
usingHasCompressionLevel = ~any(strcmp(p.UsingDefaults, 'compressionLevel'));
usingHasShuffle = ~any(strcmp(p.UsingDefaults, 'hasShuffle'));
if hasFilters && (usingHasCompressionLevel || usingHasShuffle)
warning(['`filters` keyword argument detected. This will ' ...
'override `compressionLevel` and `hasShuffle` keyword ' ...
'arguments. If you wish to use either `compressionLevel` ' ...
'or `hasShuffle`, please add their respective filter ' ...
'properties `types.untyped.datapipe.properties.Compression` ' ...
'and `types.untyped.datapipe.properties.Shuffle` to the ' ...
'`filters` properties array.']);
end

if p.Results.hasShuffle
obj.internal.setPipeProperties(Shuffle());
if hasFilters
filterCell = num2cell(p.Results.filters);
obj.internal.setPipeProperties(filterCell{:});
else
if -1 < p.Results.compressionLevel
obj.internal.setPipeProperties(Compression(...
p.Results.compressionLevel));
end

if logical(p.Results.hasShuffle)
obj.internal.setPipeProperties(Shuffle());
end
end

obj.internal.data = p.Results.data;
Expand Down Expand Up @@ -197,13 +222,24 @@
end

function val = get.compressionLevel(obj)
val = obj.internal.getPipeProperty(...
'types.untyped.datapipe.properties.Compression').level;
compressionClass = 'types.untyped.datapipe.properties.Compression';
val = -1;
if obj.internal.hasPipeProperty(compressionClass)
val = obj.internal.getPipeProperty(compressionClass).level;
end
end

function set.compressionLevel(obj, val)
import types.untyped.datapipe.properties.Compression;
obj.internal.setPipeProperty(Compression(val));
validateattributes(val, {'numeric'}, {'scalar'}, 1);
assert(-1 <= val, 'NWB:SetCompressionLevel:InvalidValue', ...
'Compression Level cannot be less than -1.');
compressionClass = 'types.untyped.datapipe.properties.Compression';
if -1 == val
obj.internal.removePipeProperty(compressionClass);
else
obj.internal.setPipeProperty(Compression(val));
end
end

function tf = get.hasShuffle(obj)
Expand Down
16 changes: 11 additions & 5 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,37 @@ trigger:

pool:
vmImage: 'ubuntu-latest'

steps:
- task: InstallMATLAB@0
# default to latest version of Matlab installed
#inputs:
# release: R2020a
# release: R2017a
- checkout: self

- task: UsePythonVersion@0
inputs:
versionSpec: '3.8'
inputs:
versionSpec: '3.8'
architecture: 'x64'

- script: |
- bash: |
python -m pip install --upgrade pip
pip install pynwb
pip install hdf5plugin
echo "##vso[task.setvariable variable=plugin_path]$(python -c "import hdf5plugin; print(hdf5plugin.PLUGINS_PATH)")"
displayName: 'Install PyNWB'

- task: RunMATLABCommand@0
inputs:
command: "results = assertSuccess(nwbtest); assert(~isempty(results), 'No tests ran');"
env:
HDF5_PLUGIN_PATH: $(plugin_path)

- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: testResults.xml

- task: PublishCodeCoverageResults@1
inputs:
codeCoverageTool: Cobertura
Expand Down
Binary file added tutorials/dynamically_loaded_filter_writes.mlx
Binary file not shown.
72 changes: 72 additions & 0 deletions tutorials/html/dynamically_loaded_filter_writes.html

Large diffs are not rendered by default.