From 12c49d23567bb2d9d5bdcb68a2d388c34f7f15d4 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Tue, 28 Nov 2023 19:16:25 +0100 Subject: [PATCH] Issue #424 Update `openeo.processes` to openeo-processes 2.0.0 rc1 --- CHANGELOG.md | 5 + openeo/internal/processes/generator.py | 13 +- openeo/internal/processes/parse.py | 19 +- openeo/processes.py | 1231 ++++++++++------- specs/openeo-processes-legacy/README.md | 3 + .../load_ml_model.json | 53 + .../openeo-processes-legacy/load_result.json | 212 +++ .../predict_random_forest.json | 42 + specs/update-subrepos.py | 3 +- tests/internal/processes/test_generator.py | 12 +- 10 files changed, 1092 insertions(+), 501 deletions(-) create mode 100644 specs/openeo-processes-legacy/README.md create mode 100644 specs/openeo-processes-legacy/load_ml_model.json create mode 100644 specs/openeo-processes-legacy/load_result.json create mode 100644 specs/openeo-processes-legacy/predict_random_forest.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 31b3bcaa9..955b97a64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Update autogenerated functions/methods in `openeo.processes` to definitions from openeo-processes project version 2.0.0-rc1. + This removes `create_raster_cube`, `fit_class_random_forest`, `fit_regr_random_forest` and `save_ml_model`. + Although removed from openeo-processes 2.0.0-rc1, support for `load_result`, `predict_random_forest` and `load_ml_model` + is preserved but deprecated. ([#424](https://github.com/Open-EO/openeo-python-client/issues/424)) + ### Removed ### Fixed diff --git a/openeo/internal/processes/generator.py b/openeo/internal/processes/generator.py index 314b244c0..4f3f9edfc 100644 --- a/openeo/internal/processes/generator.py +++ b/openeo/internal/processes/generator.py @@ -112,14 +112,17 @@ def _hanging_indent(self, paragraph: str, indent=" ", width: int = DEFAULT_WI def collect_processes(sources: List[Union[Path, str]]) -> List[Process]: - processes = [] + processes = {} for src in [Path(s) for s in sources]: if src.is_dir(): - processes.extend(parse_all_from_dir(src)) + to_add = parse_all_from_dir(src) else: - processes.append(Process.from_json_file(src)) - processes.sort(key=lambda p: p.id) - return processes + to_add = [Process.from_json_file(src)] + for p in to_add: + if p.id in processes: + raise Exception(f"Duplicate source for process {p.id!r}") + processes[p.id] = p + return sorted(processes.values(), key=lambda p: p.id) def generate_process_py(processes: List[Process], output=sys.stdout, argv=None): diff --git a/openeo/internal/processes/parse.py b/openeo/internal/processes/parse.py index c4d2ba134..f18e217ff 100644 --- a/openeo/internal/processes/parse.py +++ b/openeo/internal/processes/parse.py @@ -5,6 +5,7 @@ from __future__ import annotations import json +import typing from pathlib import Path from typing import Iterator, List, Union @@ -66,19 +67,15 @@ def from_dict(cls, data: dict) -> Returns: return cls(description=data["description"], schema=Schema.from_dict(data["schema"])) -class Process: +class Process(typing.NamedTuple): """An openEO process""" - def __init__( - self, id: str, parameters: List[Parameter], returns: Returns, - description: str = "", summary: str = "" - ): - self.id = id - self.description = description - self.parameters = parameters - self.returns = returns - self.summary = summary - # TODO: more properties? + id: str + parameters: List[Parameter] + returns: Returns + description: str = "" + summary: str = "" + # TODO: more properties? @classmethod def from_dict(cls, data: dict) -> Process: diff --git a/openeo/processes.py b/openeo/processes.py index b184fffa7..3fe2d360c 100644 --- a/openeo/processes.py +++ b/openeo/processes.py @@ -2,8 +2,8 @@ # Do not edit this file directly. # It is automatically generated. # Used command line arguments: -# openeo/internal/processes/generator.py specs/openeo-processes specs/openeo-processes/proposals --output openeo/processes.py -# Generated on 2023-09-08 +# openeo/internal/processes/generator.py specs/openeo-processes specs/openeo-processes/proposals specs/openeo-processes-legacy --output openeo/processes.py +# Generated on 2023-11-28 from __future__ import annotations @@ -141,39 +141,35 @@ def aggregate_spatial(self, geometries, reducer, target_dimension=UNSET, context """ Zonal statistics for geometries - :param self: A raster data cube. The data cube must have been reduced to only contain two spatial - dimensions and a third dimension the values are aggregated for, for example the temporal dimension to - get a time series. Otherwise, this process fails with the `TooManyDimensions` exception. The data cube - implicitly gets restricted to the bounds of the geometries as if ``filter_spatial()`` would have been - used with the same values for the corresponding parameters immediately before this process. - :param geometries: Geometries as GeoJSON on which the aggregation will be based. Vector properties are - preserved for vector data cubes and all GeoJSON Features. One value will be computed per GeoJSON - `Feature`, `Geometry` or `GeometryCollection`. For a `FeatureCollection` multiple values will be - computed, one value per contained `Feature`. For example, a single value will be computed for a - `MultiPolygon`, but two values will be computed for a `FeatureCollection` containing two polygons. - - For **polygons**, the process considers all pixels for which the point at the pixel center intersects - with the corresponding polygon (as defined in the Simple Features standard by the OGC). - For - **points**, the process considers the closest pixel center. - For **lines** (line strings), the process - considers all the pixels whose centers are closest to at least one point on the line. Thus, pixels may - be part of multiple geometries and be part of multiple aggregations. To maximize interoperability, a - nested `GeometryCollection` should be avoided. Furthermore, a `GeometryCollection` composed of a single - type of geometries should be avoided in favour of the corresponding multi-part type (e.g. - `MultiPolygon`). + :param self: A raster data cube with at least two spatial dimensions. The data cube implicitly gets + restricted to the bounds of the geometries as if ``filter_spatial()`` would have been used with the + same values for the corresponding parameters immediately before this process. + :param geometries: Geometries for which the aggregation will be computed. Feature properties are + preserved for vector data cubes and all GeoJSON Features. One value will be computed per label in the + dimension of type `geometries`, GeoJSON `Feature` or `Geometry`. For a `FeatureCollection` multiple + values will be computed, one value per contained `Feature`. No values will be computed for empty + geometries. For example, a single value will be computed for a `MultiPolygon`, but two values will be + computed for a `FeatureCollection` containing two polygons. - For **polygons**, the process considers + all pixels for which the point at the pixel center intersects with the corresponding polygon (as + defined in the Simple Features standard by the OGC). - For **points**, the process considers the + closest pixel center. - For **lines** (line strings), the process considers all the pixels whose + centers are closest to at least one point on the line. Thus, pixels may be part of multiple geometries + and be part of multiple aggregations. No operation is applied to geometries that are outside of the + bounds of the data. :param reducer: A reducer to be applied on all values of each geometry. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category 'reducer' for such processes. - :param target_dimension: The name of a new dimensions that is used to store the results. A new - dimension will be created with the given name and type `other` (see ``add_dimension()``). Defaults to - the dimension name `result`. Fails with a `TargetDimensionExists` exception if a dimension with the - specified name exists. + :param target_dimension: By default (which is `null`), the process only computes the results and + doesn't add a new dimension. If this parameter contains a new dimension name, the computation also + stores information about the total count of pixels (valid + invalid pixels) and the number of valid + pixels (see ``is_valid()``) for each computed value. These values are added as a new dimension. The new + dimension of type `other` has the dimension labels `value`, `total_count` and `valid_count`. Fails + with a `TargetDimensionExists` exception if a dimension with the specified name exists. :param context: Additional data to be passed to the reducer. - :return: A vector data cube with the computed results and restricted to the bounds of the geometries. - The computed value is used for the dimension with the name that was specified in the parameter - `target_dimension`. The computation also stores information about the total count of pixels (valid + - invalid pixels) and the number of valid pixels (see ``is_valid()``) for each geometry. These values are - added as a new dimension with a dimension name derived from `target_dimension` by adding the suffix - `_meta`. The new dimension has the dimension labels `total_count` and `valid_count`. + :return: A vector data cube with the computed results. Empty geometries still exist but without any + aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type + 'geometries' and if `target_dimension` is not `null`, a new dimension is added. """ return aggregate_spatial( data=self, @@ -204,10 +200,10 @@ def aggregate_spatial_window(self, reducer, size, boundary=UNSET, align=UNSET, c left, the process pads/trims at the lower-right. :param context: Additional data to be passed to the reducer. - :return: A data cube with the newly computed values and the same dimensions. The resolution will - change depending on the chosen values for the `size` and `boundary` parameter. It usually decreases for - the dimensions which have the corresponding parameter `size` set to values greater than 1. The - dimension labels will be set to the coordinate at the center of the window. The other dimension + :return: A raster data cube with the newly computed values and the same dimensions. The resolution + will change depending on the chosen values for the `size` and `boundary` parameter. It usually + decreases for the dimensions which have the corresponding parameter `size` set to values greater than + 1. The dimension labels will be set to the coordinate at the center of the window. The other dimension properties (name, type and reference system) remain unchanged. """ return aggregate_spatial_window( @@ -227,12 +223,10 @@ def aggregate_temporal(self, intervals, reducer, labels=UNSET, dimension=UNSET, :param self: A data cube. :param intervals: Left-closed temporal intervals, which are allowed to overlap. Each temporal interval in the array has exactly two elements: 1. The first element is the start of the temporal interval. The - specified instance in time is **included** in the interval. 2. The second element is the end of the - temporal interval. The specified instance in time is **excluded** from the interval. The specified - temporal strings follow [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Although [RFC 3339 - prohibits the hour to be '24'](https://www.rfc-editor.org/rfc/rfc3339.html#section-5.7), **this process - allows the value '24' for the hour** of an end time in order to make it possible that left-closed time - intervals can fully cover the day. + specified time instant is **included** in the interval. 2. The second element is the end of the + temporal interval. The specified time instant is **excluded** from the interval. The second element + must always be greater/later than the first element, except when using time without date. Otherwise, a + `TemporalExtentEmpty` exception is thrown. :param reducer: A reducer to be applied for the values contained in each interval. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category 'reducer' for such processes. Intervals may not contain any values, which for @@ -269,12 +263,14 @@ def aggregate_temporal_period(self, period, reducer, dimension=UNSET, context=UN :param period: The time intervals to aggregate. The following pre-defined values are available: * `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third - dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 each - year. * `month`: Month of the year * `season`: Three month periods of the calendar seasons (December - - February, March - May, June - August, September - November). * `tropical-season`: Six month periods of - the tropical seasons (November - April, May - October). * `year`: Proleptic years * `decade`: Ten year - periods ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a - 0 to the next year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 + dekad of the month can range from 8 to 11 days. For example, the third dekad of a year spans from + January 21 till January 31 (11 days), the fourth dekad spans from February 1 till February 10 (10 days) + and the sixth dekad spans from February 21 till February 28 or February 29 in a leap year (8 or 9 days + respectively). * `month`: Month of the year * `season`: Three month periods of the calendar seasons + (December - February, March - May, June - August, September - November). * `tropical-season`: Six month + periods of the tropical seasons (November - April, May - October). * `year`: Proleptic years * + `decade`: Ten year periods ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from + a year ending in a 0 to the next year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 decade](https://en.wikipedia.org/wiki/Decade#1-to-0_decade)) better aligned with the anno Domini (AD) calendar era, from a year ending in a 1 to the next year ending in a 0. :param reducer: A reducer to be applied for the values contained in each period. A reducer is a single @@ -390,7 +386,7 @@ def any(self, ignore_nodata=UNSET) -> ProcessBuilder: @openeo_process def apply(self, process, context=UNSET) -> ProcessBuilder: """ - Apply a process to each pixel + Apply a process to each value :param self: A data cube. :param process: A process that accepts and returns a single value and is applied on each individual @@ -406,12 +402,12 @@ def apply(self, process, context=UNSET) -> ProcessBuilder: @openeo_process def apply_dimension(self, process, dimension, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: """ - Apply a process to pixels along a dimension + Apply a process to all values along a dimension :param self: A data cube. - :param process: Process to be applied on all pixel values. The specified process needs to accept an - array and must return an array with at least one element. A process may consist of multiple sub- - processes. + :param process: Process to be applied on all values along the given dimension. The specified process + needs to accept an array and must return an array with at least one element. A process may consist of + multiple sub-processes. :param dimension: The name of the source dimension to apply the process on. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. :param target_dimension: The name of the target dimension or `null` (the default) to use the source @@ -425,18 +421,19 @@ def apply_dimension(self, process, dimension, target_dimension=UNSET, context=UN 1. The source dimension is the target dimension: - The (number of) dimensions remain unchanged as the source dimension is the target dimension. - The source dimension properties name and type remain unchanged. - The dimension labels, the reference system and the resolution are preserved only if the - number of pixel values in the source dimension is equal to the number of values computed by the - process. Otherwise, all other dimension properties change as defined in the list below. 2. The source - dimension is not the target dimension and the latter exists: - The number of dimensions decreases by - one as the source dimension is dropped. - The target dimension properties name and type remain - unchanged. All other dimension properties change as defined in the list below. 3. The source dimension - is not the target dimension and the latter does not exist: - The number of dimensions remain - unchanged, but the source dimension is replaced with the target dimension. - The target dimension - has the specified name and the type other. All other dimension properties are set as defined in the - list below. Unless otherwise stated above, for the given (target) dimension the following applies: - - the number of dimension labels is equal to the number of values computed by the process, - the - dimension labels are incrementing integers starting from zero, - the resolution changes, and - the - reference system is undefined. + number of values in the source dimension is equal to the number of values computed by the process. + Otherwise, all other dimension properties change as defined in the list below. 2. The source dimension + is not the target dimension. The target dimension exists with a single label only: - The number of + dimensions decreases by one as the source dimension is 'dropped' and the target dimension is filled + with the processed data that originates from the source dimension. - The target dimension properties + name and type remain unchanged. All other dimension properties change as defined in the list below. 3. + The source dimension is not the target dimension and the latter does not exist: - The number of + dimensions remain unchanged, but the source dimension is replaced with the target dimension. - The + target dimension has the specified name and the type other. All other dimension properties are set as + defined in the list below. Unless otherwise stated above, for the given (target) dimension the + following applies: - the number of dimension labels is equal to the number of values computed by the + process, - the dimension labels are incrementing integers starting from zero, - the resolution changes, + and - the reference system is undefined. """ return apply_dimension( data=self, @@ -451,7 +448,7 @@ def apply_kernel(self, kernel, factor=UNSET, border=UNSET, replace_invalid=UNSET """ Apply a spatial convolution with a kernel - :param self: A data cube. + :param self: A raster data cube. :param kernel: Kernel as a two-dimensional array of weights. The inner level of the nested array aligns with the `x` axis and the outer level aligns with the `y` axis. Each level of the kernel must have an uneven number of elements, otherwise the process throws a `KernelDimensionsUneven` exception. @@ -478,7 +475,7 @@ def apply_neighborhood(self, process, size, overlap=UNSET, context=UNSET) -> Pro """ Apply a process to pixels in a n-dimensional neighborhood - :param self: A data cube. + :param self: A raster data cube. :param process: Process to be applied on all neighborhoods. :param size: Neighborhood sizes along each dimension. This object maps dimension names to either a physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the @@ -492,8 +489,8 @@ def apply_neighborhood(self, process, size, overlap=UNSET, context=UNSET) -> Pro overlapping data in subsequent operations have no effect. :param context: Additional data to be passed to the process. - :return: A data cube with the newly computed values and the same dimensions. The dimension properties - (name, type, labels, reference system and resolution) remain unchanged. + :return: A raster data cube with the newly computed values and the same dimensions. The dimension + properties (name, type, labels, reference system and resolution) remain unchanged. """ return apply_neighborhood( data=self, @@ -503,6 +500,34 @@ def apply_neighborhood(self, process, size, overlap=UNSET, context=UNSET) -> Pro context=context ) + @openeo_process + def apply_polygon(self, polygons, process, mask_value=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to segments of the data cube + + :param self: A data cube. + :param polygons: A vector data cube containing at least one polygon. The provided vector data can be + one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or + `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or + `MultiPolygon` geometries. * Empty geometries are ignored. + :param process: A process that accepts and returns a single data cube and is applied on each individual + sub data cube. The process may consist of multiple sub-processes. + :param mask_value: All pixels for which the point at the pixel center **does not** intersect with the + polygon are replaced with the given value, which defaults to `null` (no data). It can provide a + distinction between no data values within the polygon and masked pixels outside of it. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return apply_polygon( + data=self, + polygons=polygons, + process=build_child_callback(process, parent_parameters=['data', 'context']), + mask_value=mask_value, + context=context + ) + @openeo_process def arccos(self) -> ProcessBuilder: """ @@ -925,8 +950,10 @@ def climatological_normal(self, period, climatology_period=UNSET) -> ProcessBuil season`: Six month periods of the tropical seasons (November - April, May - October). :param climatology_period: The climatology period as a closed temporal interval. The first element of the array is the first year to be fully included in the temporal interval. The second element is the - last year to be fully included in the temporal interval. The default period is from 1981 until 2010 - (both inclusive). + last year to be fully included in the temporal interval. The default climatology period is from 1981 + until 2010 (both inclusive) right now, but this might be updated over time to what is commonly used in + climatology. If you don't want to keep your research to be reproducible, please explicitly specify a + period. :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged, except for the resolution and dimension labels of the temporal @@ -1014,7 +1041,7 @@ def count(self, condition=UNSET, context=UNSET) -> ProcessBuilder: :param condition: A condition consists of one or more processes, which in the end return a boolean value. It is evaluated against each element in the array. An element is counted only if the condition returns `true`. Defaults to count valid elements in a list (see ``is_valid()``). Setting this parameter - to boolean `true` counts all elements in the list. + to boolean `true` counts all elements in the list. `false` is not a valid value for this parameter. :param context: Additional data to be passed to the condition. :return: The counted number of elements. @@ -1022,13 +1049,13 @@ def count(self, condition=UNSET, context=UNSET) -> ProcessBuilder: return count(data=self, condition=condition, context=context) @openeo_process - def create_raster_cube(self) -> ProcessBuilder: + def create_data_cube(self) -> ProcessBuilder: """ - Create an empty raster data cube + Create an empty data cube - :return: An empty raster data cube with zero dimensions. + :return: An empty data cube with no dimensions. """ - return create_raster_cube() + return create_data_cube() @openeo_process def cummax(self, ignore_nodata=UNSET) -> ProcessBuilder: @@ -1086,6 +1113,37 @@ def cumsum(self, ignore_nodata=UNSET) -> ProcessBuilder: """ return cumsum(data=self, ignore_nodata=ignore_nodata) + @openeo_process + def date_between(self, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison for dates and times + + :param self: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return date_between(x=self, min=min, max=max, exclude_max=exclude_max) + + @openeo_process + def date_difference(self, date2, unit=UNSET) -> ProcessBuilder: + """ + Computes the difference between two time instants + + :param self: The base date, optionally with a time component. + :param date2: The other date, optionally with a time component. + :param unit: The unit for the returned value. The following units are available: - millisecond - + second - leap seconds are ignored in computations. - minute - hour - day - month - year + + :return: Returns the difference between date1 and date2 in the given unit (seconds by default), + including a fractional part if required. For comparison purposes this means: - If `date1` < `date2`, + the returned value is positive. - If `date1` = `date2`, the returned value is 0. - If `date1` > + `date2`, the returned value is negative. + """ + return date_difference(date1=self, date2=date2, unit=unit) + @openeo_process def date_shift(self, value, unit) -> ProcessBuilder: """ @@ -1265,14 +1323,17 @@ def filter_labels(self, condition, dimension, context=UNSET) -> ProcessBuilder: @openeo_process def filter_spatial(self, geometries) -> ProcessBuilder: """ - Spatial filter using geometries + Spatial filter raster data cubes using geometries - :param self: A data cube. - :param geometries: One or more geometries used for filtering, specified as GeoJSON. + :param self: A raster data cube. + :param geometries: One or more geometries used for filtering, given as GeoJSON or vector data cube. If + multiple geometries are provided, the union of them is used. Empty geometries are ignored. Limits the + data cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the + pixels of the data cube use ``mask_polygon()``. - :return: A data cube restricted to the specified geometries. The dimensions and dimension properties - (name, type, labels, reference system and resolution) remain unchanged, except that the spatial - dimensions have less (or the same) dimension labels. + :return: A raster data cube restricted to the specified geometries. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + spatial dimensions have less (or the same) dimension labels. """ return filter_spatial(data=self, geometries=geometries) @@ -1283,14 +1344,14 @@ def filter_temporal(self, extent, dimension=UNSET) -> ProcessBuilder: :param self: A data cube. :param extent: Left-closed temporal interval, i.e. an array with exactly two elements: 1. The first - element is the start of the temporal interval. The specified instance in time is **included** in the - interval. 2. The second element is the end of the temporal interval. The specified instance in time is - **excluded** from the interval. The specified temporal strings follow [RFC 3339](https://www.rfc- - editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the boundaries to `null`, - but never both. + element is the start of the temporal interval. The specified time instant is **included** in the + interval. 2. The second element is the end of the temporal interval. The specified time instant is + **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by + setting one of the boundaries to `null`, but never both. :param dimension: The name of the temporal dimension to filter on. If no specific dimension is - specified or it is set to `null`, the filter applies to all temporal dimensions. Fails with a - `DimensionNotAvailable` exception if the specified dimension does not exist. + specified, the filter applies to all temporal dimensions. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. :return: A data cube restricted to the specified temporal extent. The dimensions and dimension properties (name, type, labels, reference system and resolution) remain unchanged, except that the @@ -1298,6 +1359,23 @@ def filter_temporal(self, extent, dimension=UNSET) -> ProcessBuilder: """ return filter_temporal(data=self, extent=extent, dimension=dimension) + @openeo_process + def filter_vector(self, geometries, relation=UNSET) -> ProcessBuilder: + """ + Spatial vector filter using geometries + + :param self: A vector data cube with the candidate geometries. + :param geometries: One or more base geometries used for filtering, given as vector data cube. If + multiple base geometries are provided, the union of them is used. + :param relation: The spatial filter predicate for comparing the geometries provided through (a) + `geometries` (base geometries) and (b) `data` (candidate geometries). + + :return: A vector data cube restricted to the specified geometries. The dimensions and dimension + properties (name, type, labels, reference system and resolution) remain unchanged, except that the + geometries dimension has less (or the same) dimension labels. + """ + return filter_vector(data=self, geometries=geometries, relation=relation) + @openeo_process def first(self, ignore_nodata=UNSET) -> ProcessBuilder: """ @@ -1313,80 +1391,30 @@ def first(self, ignore_nodata=UNSET) -> ProcessBuilder: return first(data=self, ignore_nodata=ignore_nodata) @openeo_process - def fit_class_random_forest(self, target, max_variables, num_trees=UNSET, seed=UNSET) -> ProcessBuilder: - """ - Train a random forest classification model - - :param self: The predictors for the classification model as a vector data cube. Aggregated to the - features (vectors) of the target input variable. - :param target: The training sites for the classification model as a vector data cube. This is - associated with the target variable for the Random Forest model. The geometry has to associated with a - value to predict (e.g. fractional forest canopy cover). - :param max_variables: Specifies how many split variables will be used at a node. The following options - are available: - *integer*: The given number of variables are considered for each split. - `all`: All - variables are considered for each split. - `log2`: The logarithm with base 2 of the number of variables - are considered for each split. - `onethird`: A third of the number of variables are considered for each - split. - `sqrt`: The square root of the number of variables are considered for each split. This is - often the default for classification. - :param num_trees: The number of trees build within the Random Forest classification. - :param seed: A randomization seed to use for the random sampling in training. If not given or `null`, - no seed is used and results may differ on subsequent use. - - :return: A model object that can be saved with ``save_ml_model()`` and restored with - ``load_ml_model()``. - """ - return fit_class_random_forest(predictors=self, target=target, max_variables=max_variables, num_trees=num_trees, seed=seed) - - @openeo_process - def fit_curve(self, parameters, function, dimension) -> ProcessBuilder: + def fit_curve(self, parameters, function, ignore_nodata=UNSET) -> ProcessBuilder: """ Curve fitting - :param self: A data cube. + :param self: A labeled array, the labels correspond to the variable `y` and the values correspond to + the variable `x`. :param parameters: Defined the number of parameters for the model function and provides an initial guess for them. At least one parameter is required. :param function: The model function. It must take the parameters to fit as array through the first argument and the independent variable `x` as the second argument. It is recommended to store the model function as a user-defined process on the back-end to be able to re-use the model function with the computed optimal values for the parameters afterwards. - :param dimension: The name of the dimension for curve fitting. Must be a dimension with labels that - have a order (i.e. numerical labels or a temporal dimension). Fails with a `DimensionNotAvailable` - exception if the specified dimension does not exist. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. + Setting this flag to `false` considers no-data values so that `null` is passed to the model function. - :return: A data cube with the optimal values for the parameters. + :return: An array with the optimal values for the parameters. """ return fit_curve( data=self, parameters=parameters, function=build_child_callback(function, parent_parameters=['x', 'parameters']), - dimension=dimension + ignore_nodata=ignore_nodata ) - @openeo_process - def fit_regr_random_forest(self, target, max_variables, num_trees=UNSET, seed=UNSET) -> ProcessBuilder: - """ - Train a random forest regression model - - :param self: The predictors for the regression model as a vector data cube. Aggregated to the features - (vectors) of the target input variable. - :param target: The training sites for the regression model as a vector data cube. This is associated - with the target variable for the Random Forest model. The geometry has to associated with a value to - predict (e.g. fractional forest canopy cover). - :param max_variables: Specifies how many split variables will be used at a node. The following options - are available: - *integer*: The given number of variables are considered for each split. - `all`: All - variables are considered for each split. - `log2`: The logarithm with base 2 of the number of variables - are considered for each split. - `onethird`: A third of the number of variables are considered for each - split. This is often the default for regression. - `sqrt`: The square root of the number of variables - are considered for each split. - :param num_trees: The number of trees build within the Random Forest regression. - :param seed: A randomization seed to use for the random sampling in training. If not given or `null`, - no seed is used and results may differ on subsequent use. - - :return: A model object that can be saved with ``save_ml_model()`` and restored with - ``load_ml_model()``. - """ - return fit_regr_random_forest(predictors=self, target=target, max_variables=max_variables, num_trees=num_trees, seed=seed) - @openeo_process def flatten_dimensions(self, dimensions, target_dimension, label_separator=UNSET) -> ProcessBuilder: """ @@ -1459,19 +1487,19 @@ def if_(self, accept, reject=UNSET) -> ProcessBuilder: return if_(value=self, accept=accept, reject=reject) @openeo_process - def inspect(self, code=UNSET, level=UNSET, message=UNSET) -> ProcessBuilder: + def inspect(self, message=UNSET, code=UNSET, level=UNSET) -> ProcessBuilder: """ Add information to the logs :param self: Data to log. + :param message: A message to send in addition to the data. :param code: A label to help identify one or more log entries originating from this process in the list of all log entries. It can help to group or filter log entries and is usually not unique. :param level: The severity level of this message, defaults to `info`. - :param message: A message to send in addition to the data. :return: The data as passed to the `data` parameter without any modification. """ - return inspect(data=self, code=code, level=level, message=message) + return inspect(data=self, message=message, code=code, level=level) @openeo_process def int(self) -> ProcessBuilder: @@ -1502,7 +1530,7 @@ def is_nan(self) -> ProcessBuilder: :param self: The data to check. - :return: `true` if the data is not a number, otherwise `false`. + :return: Returns `true` for `NaN` and all non-numeric data types, otherwise returns `false`. """ return is_nan(x=self) @@ -1576,24 +1604,27 @@ def load_collection(self, spatial_extent, temporal_extent, bands=UNSET, properti :param self: The collection id. :param spatial_extent: Limits the data to load from the collection to the specified bounding box or - polygons. The process puts a pixel into the data cube if the point at the pixel center intersects with - the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). The - GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a - `Feature` with a `Polygon` or `MultiPolygon` geometry, * a `FeatureCollection` containing at least one - `Feature` with `Polygon` or `MultiPolygon` geometries, or * a `GeometryCollection` containing `Polygon` - or `MultiPolygon` geometries. To maximize interoperability, `GeometryCollection` should be avoided in - favour of one of the alternatives above. Set this parameter to `null` to set no limit for the spatial - extent. Be careful with this when loading large datasets! It is recommended to use this parameter - instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). * For vector data, the process loads the geometry into the data cube if the + geometry is fully *within* the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been + provided. The GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` + geometry, * a `Feature` with a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` + containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries. * Empty geometries are + ignored. Set this parameter to `null` to set no limit for the spatial extent. Be careful with this + when loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` + or ``filter_spatial()`` directly after loading unbounded data. :param temporal_extent: Limits the data to load from the collection to the specified left-closed temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. The first element is the start of the temporal interval. The specified - instance in time is **included** in the interval. 2. The second element is the end of the temporal - interval. The specified instance in time is **excluded** from the interval. The specified temporal - strings follow [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by - setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit - for the temporal extent. Be careful with this when loading large datasets! It is recommended to use - this parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + time instant is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified time instant is **excluded** from the interval. The second element must always + be greater/later than the first element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also + supports unbounded intervals by setting one of the boundaries to `null`, but never both. Set this + parameter to `null` to set no limit for the temporal extent. Be careful with this when loading large + datasets! It is recommended to use this parameter instead of using ``filter_temporal()`` directly after + loading unbounded data. :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of band names are not available. Applies to all dimensions of type `bands`. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in @@ -1614,6 +1645,28 @@ def load_collection(self, spatial_extent, temporal_extent, bands=UNSET, properti """ return load_collection(id=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + @openeo_process + def load_geojson(self, properties=UNSET) -> ProcessBuilder: + """ + Converts GeoJSON into a vector data cube + + :param self: A GeoJSON object to convert into a vector data cube. The GeoJSON type `GeometryCollection` + is not supported. Each geometry in the GeoJSON data results in a dimension label in the `geometries` + dimension. + :param properties: A list of properties from the GeoJSON file to construct an additional dimension + from. A new dimension with the name `properties` and type `other` is created if at least one property + is provided. Only applies for GeoJSON Features and FeatureCollections. Missing values are generally set + to no-data (`null`). Depending on the number of properties provided, the process creates the dimension + differently: - Single property with scalar values: A single dimension label with the name of the + property and a single value per geometry. - Single property of type array: The dimension labels + correspond to the array indices. There are as many values and labels per geometry as there are for the + largest array. - Multiple properties with scalar values: The dimension labels correspond to the + property names. There are as many values and labels per geometry as there are properties provided here. + + :return: A vector data cube containing the geometries, either one or two dimensional. + """ + return load_geojson(data=self, properties=properties) + @openeo_process def load_ml_model(self) -> ProcessBuilder: """ @@ -1634,15 +1687,17 @@ def load_result(self, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) :param self: The id of a batch job with results. :param spatial_extent: Limits the data to load from the batch job result to the specified bounding box - or polygons. The process puts a pixel into the data cube if the point at the pixel center intersects - with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). - The GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a - `Feature` with a `Polygon` or `MultiPolygon` geometry, * a `FeatureCollection` containing at least one - `Feature` with `Polygon` or `MultiPolygon` geometries, or * a `GeometryCollection` containing `Polygon` - or `MultiPolygon` geometries. To maximize interoperability, `GeometryCollection` should be avoided in - favour of one of the alternatives above. Set this parameter to `null` to set no limit for the spatial - extent. Be careful with this when loading large datasets! It is recommended to use this parameter - instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + or polygons. * For raster data, the process loads the pixel into the data cube if the point at the + pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). * For vector data, the process loads the geometry into the data cube of the + geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features + standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been + provided. The GeoJSON can be one of the following feature types: * A `Polygon` or `MultiPolygon` + geometry, * a `Feature` with a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` + containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries. Set this parameter to + `null` to set no limit for the spatial extent. Be careful with this when loading large datasets! It is + recommended to use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly + after loading unbounded data. :param temporal_extent: Limits the data to load from the batch job result to the specified left-closed temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. The first element is the start of the temporal interval. The specified @@ -1665,6 +1720,58 @@ def load_result(self, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) """ return load_result(id=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands) + @openeo_process + def load_stac(self, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Loads data from STAC + + :param self: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a + specific STAC API Collection that allows to filter items and to download assets. This includes batch + job results, which itself are compliant to STAC. For external URLs, authentication details such as API + keys or tokens may need to be included in the URL. Batch job results can be specified in two ways: - + For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with `/jobs/{id}/results` and `{id}` is the + corresponding batch job ID. - For external results, a signed URL must be provided. Not all back-ends + support signed URLs, which are provided as a link with the link relation `canonical` in the batch job + result metadata. + :param spatial_extent: Limits the data to load to the specified bounding box or polygons. * For raster + data, the process loads the pixel into the data cube if the point at the pixel center intersects with + the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). * For + vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty + geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be one + of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. Set this parameter to `null` to set no limit for the spatial + extent. Be careful with this when loading large datasets! It is recommended to use this parameter + instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load to the specified left-closed temporal interval. Applies + to all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. + The first element is the start of the temporal interval. The specified instance in time is **included** + in the interval. 2. The second element is the end of the temporal interval. The specified instance in + time is **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports open intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit + for the temporal extent. Be careful with this when loading large datasets! It is recommended to use + this parameter instead of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. Either the unique band + name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in + bands) can be specified. If the unique band name and the common name conflict, the unique band name has + a higher priority. The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. It is + recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded + data. + :param properties: Limits the data by metadata properties to include only data in the data cube which + all given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the + name of the metadata property, which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. This parameter + is not supported for static STAC. + + :return: A data cube for further processing. + """ + return load_stac(url=self, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + @openeo_process def load_uploaded_files(self, format, options=UNSET) -> ProcessBuilder: """ @@ -1684,6 +1791,25 @@ def load_uploaded_files(self, format, options=UNSET) -> ProcessBuilder: """ return load_uploaded_files(paths=self, format=format, options=options) + @openeo_process + def load_url(self, format, options=UNSET) -> ProcessBuilder: + """ + Load data from a URL + + :param self: The URL to read from. Authentication details such as API keys or tokens may need to be + included in the URL. + :param format: The file format to use when loading the data. It must be one of the values that the + server reports as supported input file formats, which usually correspond to the short GDAL/OGR codes. + If the format is not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This + parameter is *case insensitive*. + :param options: The file format parameters to use when reading the data. Must correspond to the + parameters that the server reports as supported parameters for the chosen `format`. The parameter names + and valid values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return load_url(url=self, format=format, options=options) + @openeo_process def log(self, base) -> ProcessBuilder: """ @@ -1742,12 +1868,10 @@ def mask_polygon(self, mask, replacement=UNSET, inside=UNSET) -> ProcessBuilder: Apply a polygon mask :param self: A raster data cube. - :param mask: A GeoJSON object containing at least one polygon. The provided feature types can be one of - the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or - `MultiPolygon` geometry, * a `FeatureCollection` containing at least one `Feature` with `Polygon` or - `MultiPolygon` geometries, or * a `GeometryCollection` containing `Polygon` or `MultiPolygon` - geometries. To maximize interoperability, `GeometryCollection` should be avoided in favour of one of - the alternatives above. + :param mask: A GeoJSON object or a vector data cube containing at least one polygon. The provided + vector data can be one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with + a `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` + with `Polygon` or `MultiPolygon` geometries. * Empty geometries are ignored. :param replacement: The value used to replace masked values with. :param inside: If set to `true` all pixels for which the point at the pixel center **does** intersect with any polygon are replaced. @@ -1804,8 +1928,8 @@ def merge_cubes(self, cube2, overlap_resolver=UNSET, context=UNSET) -> ProcessBu """ Merge two data cubes - :param self: The first data cube. - :param cube2: The second data cube. + :param self: The base data cube. + :param cube2: The other data cube to be merged with the base data cube. :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The reducer must return a value of the same data type as the input values are. The reduction operator may be a single process such as ``multiply()`` or consist of multiple sub-processes. `null` (the default) @@ -1953,7 +2077,7 @@ def or_(self, y) -> ProcessBuilder: @openeo_process def order(self, asc=UNSET, nodata=UNSET) -> ProcessBuilder: """ - Create a permutation + Get the order of array elements :param self: An array to compute the order for. :param asc: The default sort order is ascending, with smallest values first. To sort in reverse @@ -1987,25 +2111,23 @@ def power(self, p) -> ProcessBuilder: return power(base=self, p=p) @openeo_process - def predict_curve(self, parameters, function, dimension, labels=UNSET) -> ProcessBuilder: + def predict_curve(self, function, dimension, labels=UNSET) -> ProcessBuilder: """ Predict values - :param self: A data cube to predict values for. - :param parameters: A data cube with optimal values from a result of e.g. ``fit_curve()``. + :param self: A data cube with optimal values, e.g. computed by the process ``fit_curve()``. :param function: The model function. It must take the parameters to fit as array through the first argument and the independent variable `x` as the second argument. It is recommended to store the model function as a user-defined process on the back-end. - :param dimension: The name of the dimension for predictions. Fails with a `DimensionNotAvailable` - exception if the specified dimension does not exist. + :param dimension: The name of the dimension for predictions. :param labels: The labels to predict values for. If no labels are given, predicts values only for no- data (`null`) values in the data cube. - :return: A data cube with the predicted values. + :return: A data cube with the predicted values with the provided dimension `dimension` having as many + labels as provided through `labels`. """ return predict_curve( - data=self, - parameters=parameters, + parameters=self, function=build_child_callback(function, parent_parameters=['x', 'parameters']), dimension=dimension, labels=labels @@ -2014,7 +2136,7 @@ def predict_curve(self, parameters, function, dimension, labels=UNSET) -> Proces @openeo_process def predict_random_forest(self, model) -> ProcessBuilder: """ - Predict values from a Random Forest model + Predict values based on a Random Forest model :param self: An array of numbers. :param model: A model object that can be trained with the processes ``fit_regr_random_forest()`` @@ -2045,10 +2167,13 @@ def quantiles(self, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> Proces Quantiles :param self: An array of numbers. - :param probabilities: A list of probabilities to calculate quantiles for. The probabilities must be - between 0 and 1 (inclusive). + :param probabilities: Quantiles to calculate. Either a list of probabilities or the number of + intervals: * Provide an array with a sorted list of probabilities in ascending order to calculate + quantiles for. The probabilities must be between 0 and 1 (inclusive). If not sorted in ascending order, + an `AscendingProbabilitiesRequired` exception is thrown. * Provide an integer to specify the number of + intervals to calculate quantiles for. Calculates q-quantiles with equal-sized intervals. :param q: Number of intervals to calculate quantiles for. Calculates q-quantiles with equal-sized - intervals. + intervals. This parameter has been **deprecated**. Please use the parameter `probabilities` instead. :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting this flag to `false` considers no-data values so that an array with `null` values is returned if any element is such a value. @@ -2063,7 +2188,7 @@ def quantiles(self, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> Proces @openeo_process def rearrange(self, order) -> ProcessBuilder: """ - Rearrange an array based on a permutation + Sort an array based on a permutation :param self: The array to rearrange. :param order: The permutation used for rearranging. @@ -2101,7 +2226,7 @@ def reduce_spatial(self, reducer, context=UNSET) -> ProcessBuilder: """ Reduce spatial dimensions 'x' and 'y' - :param self: A data cube. + :param self: A raster data cube. :param reducer: A reducer to apply on the horizontal spatial dimensions. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category 'reducer' for such processes. @@ -2158,8 +2283,8 @@ def resample_cube_spatial(self, target, method=UNSET) -> ProcessBuilder: """ Resample the spatial dimensions to match a target data cube - :param self: A data cube. - :param target: A data cube that describes the spatial target resolution. + :param self: A raster data cube. + :param target: A raster data cube that describes the spatial target resolution. :param method: Resampling method to use. The following options are available and are meant to align with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling @@ -2173,9 +2298,9 @@ def resample_cube_spatial(self, target, method=UNSET) -> ProcessBuilder: valid pixels * `sum`: compute the weighted sum of all valid pixels Valid pixels are determined based on the function ``is_valid()``. - :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference - system and resolution) remain unchanged, except for the resolution and dimension labels of the spatial - dimensions. + :return: A raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of + the spatial dimensions. """ return resample_cube_spatial(data=self, target=target, method=method) @@ -2199,10 +2324,9 @@ def resample_cube_temporal(self, target, dimension=UNSET, valid_within=UNSET) -> `2020-01-22 12:00:00`. If no valid value is found within the given period, the value will be set to no- data (`null`). - :return: A raster data cube with the same dimensions and the same dimension properties (name, type, - labels, reference system and resolution) for all non-temporal dimensions. For the temporal dimension, - the name and type remain unchanged, but the dimension labels, resolution and reference system may - change. + :return: A data cube with the same dimensions and the same dimension properties (name, type, labels, + reference system and resolution) for all non-temporal dimensions. For the temporal dimension, the name + and type remain unchanged, but the dimension labels, resolution and reference system may change. """ return resample_cube_temporal(data=self, target=target, dimension=dimension, valid_within=valid_within) @@ -2216,10 +2340,9 @@ def resample_spatial(self, resolution=UNSET, projection=UNSET, method=UNSET, ali separate values for x and y or as a single value for both axes. Specified in the units of the target projection. Doesn't change the resolution by default (`0`). :param projection: Warps the data cube to the target projection, specified as as [EPSG - code](http://www.epsg-registry.org/), [WKT2 (ISO 19162) - string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html), [PROJ definition - (deprecated)](https://proj.org/usage/quickstart.html). By default (`null`), the projection is not - changed. + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). By default (`null`), the projection + is not changed. :param method: Resampling method to use. The following options are available and are meant to align with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling @@ -2327,18 +2450,6 @@ def sar_backscatter(self, coefficient=UNSET, elevation_model=UNSET, mask=UNSET, options=options ) - @openeo_process - def save_ml_model(self, options=UNSET) -> ProcessBuilder: - """ - Save a ML model - - :param self: The data to store as a machine learning model. - :param options: Additional parameters to create the file(s). - - :return: Returns `false` if the process failed to store the model, `true` otherwise. - """ - return save_ml_model(data=self, options=options) - @openeo_process def save_result(self, format, options=UNSET) -> ProcessBuilder: """ @@ -2346,14 +2457,16 @@ def save_result(self, format, options=UNSET) -> ProcessBuilder: :param self: The data to deliver in the given file format. :param format: The file format to use. It must be one of the values that the server reports as - supported output file formats, which usually correspond to the short GDAL/OGR codes. If the format is - not suitable for storing the underlying data structure, a `FormatUnsuitable` exception will be thrown. - This parameter is *case insensitive*. + supported output file formats, which usually correspond to the short GDAL/OGR codes. This parameter is + *case insensitive*. * If the data cube is empty and the file format can't store empty data cubes, a + `DataCubeEmpty` exception is thrown. * If the file format is otherwise not suitable for storing the + underlying data structure, a `FormatUnsuitable` exception is thrown. :param options: The file format parameters to be used to create the file(s). Must correspond to the parameters that the server reports as supported parameters for the chosen `format`. The parameter names and valid values usually correspond to the GDAL/OGR format options. - :return: Returns `false` if the process failed to make the data available, `true` otherwise. + :return: Always returns `true` as in case of an error an exception is thrown which aborts the execution + of the process. """ return save_result(data=self, format=format, options=options) @@ -2537,10 +2650,10 @@ def trim_cube(self) -> ProcessBuilder: """ Remove dimension labels with no-data values - :param self: A raster data cube to trim. + :param self: A data cube to trim. - :return: A trimmed raster data cube with the same dimensions. The dimension properties name, type, - reference system and resolution remain unchanged. The number of dimension labels may decrease. + :return: A trimmed data cube with the same dimensions. The dimension properties name, type, reference + system and resolution remain unchanged. The number of dimension labels may decrease. """ return trim_cube(data=self) @@ -2584,26 +2697,42 @@ def vector_buffer(self, distance) -> ProcessBuilder: """ Buffer geometries by distance - :param self: Geometries to apply the buffer on. Vector properties are preserved for vector data cubes - and all GeoJSON Features. To maximize interoperability, a nested `GeometryCollection` should be - avoided. Furthermore, a `GeometryCollection` composed of a single type of geometries should be avoided - in favour of the corresponding multi-part type (e.g. `MultiPolygon`). - :param distance: The distance of the buffer in the unit of the spatial reference system. A positive - distance expands the geometries and results in outward buffering (dilation) while a negative distance - shrinks the geometries and results in inward buffering (erosion). + :param self: Geometries to apply the buffer on. Feature properties are preserved. + :param distance: The distance of the buffer in meters. A positive distance expands the geometries, + resulting in outward buffering (dilation), while a negative distance shrinks the geometries, resulting + in inward buffering (erosion). If the unit of the spatial reference system is not meters, a + `UnitMismatch` error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable + spatial reference system. - :return: Returns a vector data cube with the computed new geometries. + :return: Returns a vector data cube with the computed new geometries of which some may be empty. """ return vector_buffer(geometries=self, distance=distance) + @openeo_process + def vector_reproject(self, projection, dimension=UNSET) -> ProcessBuilder: + """ + Reprojects the geometry dimension + + :param self: A vector data cube. + :param projection: Coordinate reference system to reproject to. Specified as an [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). + :param dimension: The name of the geometry dimension to reproject. If no specific dimension is + specified, the filter applies to all geometry dimensions. Fails with a `DimensionNotAvailable` + exception if the specified dimension does not exist. + + :return: A vector data cube with geometries projected to the new coordinate reference system. The + reference system of the geometry dimension changes, all other dimensions and properties remain + unchanged. + """ + return vector_reproject(data=self, projection=projection, dimension=dimension) + @openeo_process def vector_to_random_points(self, geometry_count=UNSET, total_count=UNSET, group=UNSET, seed=UNSET) -> ProcessBuilder: """ Sample random points from geometries - :param self: Input geometries for sample extraction. To maximize interoperability, a nested - `GeometryCollection` should be avoided. Furthermore, a `GeometryCollection` composed of a single type - of geometries should be avoided in favour of the corresponding multi-part type (e.g. `MultiPolygon`). + :param self: Input geometries for sample extraction. :param geometry_count: The maximum number of points to compute per geometry. Points in the input geometries can be selected only once by the sampling. :param total_count: The maximum number of points to compute overall. Throws a `CountMismatch` @@ -2624,17 +2753,17 @@ def vector_to_regular_points(self, distance, group=UNSET) -> ProcessBuilder: """ Sample regular points from geometries - :param self: Input geometries for sample extraction. To maximize interoperability, a nested - `GeometryCollection` should be avoided. Furthermore, a `GeometryCollection` composed of a single type - of geometries should be avoided in favour of the corresponding multi-part type (e.g. `MultiPolygon`). - :param distance: Defines the minimum distance in the unit of the reference system that is required - between two samples generated *inside* a single geometry. - For **polygons**, the distance defines the - cell sizes of a regular grid that starts at the upper-left bound of each polygon. The centroid of each - cell is then a sample point. If the centroid is not enclosed in the polygon, no point is sampled. If no - point can be sampled for the geometry at all, the first coordinate of the geometry is returned as - point. - For **lines** (line strings), the sampling starts with a point at the first coordinate of the - line and then walks along the line and samples a new point each time the distance to the previous point - has been reached again. - For **points**, the point is returned as given. + :param self: Input geometries for sample extraction. + :param distance: Defines the minimum distance in meters that is required between two samples generated + *inside* a single geometry. If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. - For **polygons**, the distance defines the cell sizes of a regular grid that starts at the + upper-left bound of each polygon. The centroid of each cell is then a sample point. If the centroid is + not enclosed in the polygon, no point is sampled. If no point can be sampled for the geometry at all, + the first coordinate of the geometry is returned as point. - For **lines** (line strings), the sampling + starts with a point at the first coordinate of the line and then walks along the line and samples a new + point each time the distance to the previous point has been reached again. - For **points**, the point + is returned as given. :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be generated as independent points. * If the sampled points are grouped, the process generates a `MultiPoint` per geometry given which keeps the original identifier if present. * Otherwise, each @@ -2709,38 +2838,34 @@ def aggregate_spatial(data, geometries, reducer, target_dimension=UNSET, context """ Zonal statistics for geometries - :param data: A raster data cube. The data cube must have been reduced to only contain two spatial - dimensions and a third dimension the values are aggregated for, for example the temporal dimension to get a - time series. Otherwise, this process fails with the `TooManyDimensions` exception. The data cube - implicitly gets restricted to the bounds of the geometries as if ``filter_spatial()`` would have been used - with the same values for the corresponding parameters immediately before this process. - :param geometries: Geometries as GeoJSON on which the aggregation will be based. Vector properties are - preserved for vector data cubes and all GeoJSON Features. One value will be computed per GeoJSON - `Feature`, `Geometry` or `GeometryCollection`. For a `FeatureCollection` multiple values will be computed, - one value per contained `Feature`. For example, a single value will be computed for a `MultiPolygon`, but - two values will be computed for a `FeatureCollection` containing two polygons. - For **polygons**, the - process considers all pixels for which the point at the pixel center intersects with the corresponding - polygon (as defined in the Simple Features standard by the OGC). - For **points**, the process considers - the closest pixel center. - For **lines** (line strings), the process considers all the pixels whose - centers are closest to at least one point on the line. Thus, pixels may be part of multiple geometries and - be part of multiple aggregations. To maximize interoperability, a nested `GeometryCollection` should be - avoided. Furthermore, a `GeometryCollection` composed of a single type of geometries should be avoided in - favour of the corresponding multi-part type (e.g. `MultiPolygon`). + :param data: A raster data cube with at least two spatial dimensions. The data cube implicitly gets + restricted to the bounds of the geometries as if ``filter_spatial()`` would have been used with the same + values for the corresponding parameters immediately before this process. + :param geometries: Geometries for which the aggregation will be computed. Feature properties are preserved + for vector data cubes and all GeoJSON Features. One value will be computed per label in the dimension of + type `geometries`, GeoJSON `Feature` or `Geometry`. For a `FeatureCollection` multiple values will be + computed, one value per contained `Feature`. No values will be computed for empty geometries. For example, + a single value will be computed for a `MultiPolygon`, but two values will be computed for a + `FeatureCollection` containing two polygons. - For **polygons**, the process considers all pixels for + which the point at the pixel center intersects with the corresponding polygon (as defined in the Simple + Features standard by the OGC). - For **points**, the process considers the closest pixel center. - For + **lines** (line strings), the process considers all the pixels whose centers are closest to at least one + point on the line. Thus, pixels may be part of multiple geometries and be part of multiple aggregations. + No operation is applied to geometries that are outside of the bounds of the data. :param reducer: A reducer to be applied on all values of each geometry. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category 'reducer' for such processes. - :param target_dimension: The name of a new dimensions that is used to store the results. A new dimension - will be created with the given name and type `other` (see ``add_dimension()``). Defaults to the dimension - name `result`. Fails with a `TargetDimensionExists` exception if a dimension with the specified name - exists. + :param target_dimension: By default (which is `null`), the process only computes the results and doesn't + add a new dimension. If this parameter contains a new dimension name, the computation also stores + information about the total count of pixels (valid + invalid pixels) and the number of valid pixels (see + ``is_valid()``) for each computed value. These values are added as a new dimension. The new dimension of + type `other` has the dimension labels `value`, `total_count` and `valid_count`. Fails with a + `TargetDimensionExists` exception if a dimension with the specified name exists. :param context: Additional data to be passed to the reducer. - :return: A vector data cube with the computed results and restricted to the bounds of the geometries. The - computed value is used for the dimension with the name that was specified in the parameter - `target_dimension`. The computation also stores information about the total count of pixels (valid + - invalid pixels) and the number of valid pixels (see ``is_valid()``) for each geometry. These values are - added as a new dimension with a dimension name derived from `target_dimension` by adding the suffix - `_meta`. The new dimension has the dimension labels `total_count` and `valid_count`. + :return: A vector data cube with the computed results. Empty geometries still exist but without any + aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type 'geometries' + and if `target_dimension` is not `null`, a new dimension is added. """ return _process('aggregate_spatial', data=data, @@ -2772,8 +2897,8 @@ def aggregate_spatial_window(data, reducer, size, boundary=UNSET, align=UNSET, c the process pads/trims at the lower-right. :param context: Additional data to be passed to the reducer. - :return: A data cube with the newly computed values and the same dimensions. The resolution will change - depending on the chosen values for the `size` and `boundary` parameter. It usually decreases for the + :return: A raster data cube with the newly computed values and the same dimensions. The resolution will + change depending on the chosen values for the `size` and `boundary` parameter. It usually decreases for the dimensions which have the corresponding parameter `size` set to values greater than 1. The dimension labels will be set to the coordinate at the center of the window. The other dimension properties (name, type and reference system) remain unchanged. @@ -2796,12 +2921,10 @@ def aggregate_temporal(data, intervals, reducer, labels=UNSET, dimension=UNSET, :param data: A data cube. :param intervals: Left-closed temporal intervals, which are allowed to overlap. Each temporal interval in the array has exactly two elements: 1. The first element is the start of the temporal interval. The - specified instance in time is **included** in the interval. 2. The second element is the end of the - temporal interval. The specified instance in time is **excluded** from the interval. The specified - temporal strings follow [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Although [RFC 3339 - prohibits the hour to be '24'](https://www.rfc-editor.org/rfc/rfc3339.html#section-5.7), **this process - allows the value '24' for the hour** of an end time in order to make it possible that left-closed time - intervals can fully cover the day. + specified time instant is **included** in the interval. 2. The second element is the end of the temporal + interval. The specified time instant is **excluded** from the interval. The second element must always be + greater/later than the first element, except when using time without date. Otherwise, a + `TemporalExtentEmpty` exception is thrown. :param reducer: A reducer to be applied for the values contained in each interval. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category 'reducer' for such processes. Intervals may not contain any values, which for most reducers @@ -2838,9 +2961,11 @@ def aggregate_temporal_period(data, period, reducer, dimension=UNSET, context=UN :param period: The time intervals to aggregate. The following pre-defined values are available: * `hour`: Hour of the day * `day`: Day of the year * `week`: Week of the year * `dekad`: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month - can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 each year. * `month`: Month - of the year * `season`: Three month periods of the calendar seasons (December - February, March - May, June - - August, September - November). * `tropical-season`: Six month periods of the tropical seasons (November - + can range from 8 to 11 days. For example, the third dekad of a year spans from January 21 till January 31 + (11 days), the fourth dekad spans from February 1 till February 10 (10 days) and the sixth dekad spans from + February 21 till February 28 or February 29 in a leap year (8 or 9 days respectively). * `month`: Month of + the year * `season`: Three month periods of the calendar seasons (December - February, March - May, June - + August, September - November). * `tropical-season`: Six month periods of the tropical seasons (November - April, May - October). * `year`: Proleptic years * `decade`: Ten year periods ([0-to-9 decade](https://en.wikipedia.org/wiki/Decade#0-to-9_decade)), from a year ending in a 0 to the next year ending in a 9. * `decade-ad`: Ten year periods ([1-to-0 @@ -2962,7 +3087,7 @@ def any(data, ignore_nodata=UNSET) -> ProcessBuilder: @openeo_process def apply(data, process, context=UNSET) -> ProcessBuilder: """ - Apply a process to each pixel + Apply a process to each value :param data: A data cube. :param process: A process that accepts and returns a single value and is applied on each individual value @@ -2979,11 +3104,12 @@ def apply(data, process, context=UNSET) -> ProcessBuilder: @openeo_process def apply_dimension(data, process, dimension, target_dimension=UNSET, context=UNSET) -> ProcessBuilder: """ - Apply a process to pixels along a dimension + Apply a process to all values along a dimension :param data: A data cube. - :param process: Process to be applied on all pixel values. The specified process needs to accept an array - and must return an array with at least one element. A process may consist of multiple sub-processes. + :param process: Process to be applied on all values along the given dimension. The specified process needs + to accept an array and must return an array with at least one element. A process may consist of multiple + sub-processes. :param dimension: The name of the source dimension to apply the process on. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist. :param target_dimension: The name of the target dimension or `null` (the default) to use the source @@ -2997,17 +3123,19 @@ def apply_dimension(data, process, dimension, target_dimension=UNSET, context=UN The source dimension is the target dimension: - The (number of) dimensions remain unchanged as the source dimension is the target dimension. - The source dimension properties name and type remain unchanged. - The dimension labels, the reference system and the resolution are preserved only if the - number of pixel values in the source dimension is equal to the number of values computed by the process. + number of values in the source dimension is equal to the number of values computed by the process. Otherwise, all other dimension properties change as defined in the list below. 2. The source dimension is - not the target dimension and the latter exists: - The number of dimensions decreases by one as the - source dimension is dropped. - The target dimension properties name and type remain unchanged. All other - dimension properties change as defined in the list below. 3. The source dimension is not the target - dimension and the latter does not exist: - The number of dimensions remain unchanged, but the source - dimension is replaced with the target dimension. - The target dimension has the specified name and the - type other. All other dimension properties are set as defined in the list below. Unless otherwise stated - above, for the given (target) dimension the following applies: - the number of dimension labels is equal - to the number of values computed by the process, - the dimension labels are incrementing integers starting - from zero, - the resolution changes, and - the reference system is undefined. + not the target dimension. The target dimension exists with a single label only: - The number of + dimensions decreases by one as the source dimension is 'dropped' and the target dimension is filled with + the processed data that originates from the source dimension. - The target dimension properties name and + type remain unchanged. All other dimension properties change as defined in the list below. 3. The source + dimension is not the target dimension and the latter does not exist: - The number of dimensions remain + unchanged, but the source dimension is replaced with the target dimension. - The target dimension has + the specified name and the type other. All other dimension properties are set as defined in the list below. + Unless otherwise stated above, for the given (target) dimension the following applies: - the number of + dimension labels is equal to the number of values computed by the process, - the dimension labels are + incrementing integers starting from zero, - the resolution changes, and - the reference system is + undefined. """ return _process('apply_dimension', data=data, @@ -3023,7 +3151,7 @@ def apply_kernel(data, kernel, factor=UNSET, border=UNSET, replace_invalid=UNSET """ Apply a spatial convolution with a kernel - :param data: A data cube. + :param data: A raster data cube. :param kernel: Kernel as a two-dimensional array of weights. The inner level of the nested array aligns with the `x` axis and the outer level aligns with the `y` axis. Each level of the kernel must have an uneven number of elements, otherwise the process throws a `KernelDimensionsUneven` exception. @@ -3050,7 +3178,7 @@ def apply_neighborhood(data, process, size, overlap=UNSET, context=UNSET) -> Pro """ Apply a process to pixels in a n-dimensional neighborhood - :param data: A data cube. + :param data: A raster data cube. :param process: Process to be applied on all neighborhoods. :param size: Neighborhood sizes along each dimension. This object maps dimension names to either a physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the @@ -3064,8 +3192,8 @@ def apply_neighborhood(data, process, size, overlap=UNSET, context=UNSET) -> Pro operations have no effect. :param context: Additional data to be passed to the process. - :return: A data cube with the newly computed values and the same dimensions. The dimension properties - (name, type, labels, reference system and resolution) remain unchanged. + :return: A raster data cube with the newly computed values and the same dimensions. The dimension + properties (name, type, labels, reference system and resolution) remain unchanged. """ return _process('apply_neighborhood', data=data, @@ -3076,6 +3204,35 @@ def apply_neighborhood(data, process, size, overlap=UNSET, context=UNSET) -> Pro ) +@openeo_process +def apply_polygon(data, polygons, process, mask_value=UNSET, context=UNSET) -> ProcessBuilder: + """ + Apply a process to segments of the data cube + + :param data: A data cube. + :param polygons: A vector data cube containing at least one polygon. The provided vector data can be one of + the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or `MultiPolygon` + geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` + geometries. * Empty geometries are ignored. + :param process: A process that accepts and returns a single data cube and is applied on each individual sub + data cube. The process may consist of multiple sub-processes. + :param mask_value: All pixels for which the point at the pixel center **does not** intersect with the + polygon are replaced with the given value, which defaults to `null` (no data). It can provide a + distinction between no data values within the polygon and masked pixels outside of it. + :param context: Additional data to be passed to the process. + + :return: A data cube with the newly computed values and the same dimensions. The dimension properties + (name, type, labels, reference system and resolution) remain unchanged. + """ + return _process('apply_polygon', + data=data, + polygons=polygons, + process=build_child_callback(process, parent_parameters=['data', 'context']), + mask_value=mask_value, + context=context + ) + + @openeo_process def arccos(x) -> ProcessBuilder: """ @@ -3519,7 +3676,9 @@ def climatological_normal(data, period, climatology_period=UNSET) -> ProcessBuil tropical seasons (November - April, May - October). :param climatology_period: The climatology period as a closed temporal interval. The first element of the array is the first year to be fully included in the temporal interval. The second element is the last year - to be fully included in the temporal interval. The default period is from 1981 until 2010 (both inclusive). + to be fully included in the temporal interval. The default climatology period is from 1981 until 2010 + (both inclusive) right now, but this might be updated over time to what is commonly used in climatology. If + you don't want to keep your research to be reproducible, please explicitly specify a period. :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged, except for the resolution and dimension labels of the temporal @@ -3612,7 +3771,7 @@ def count(data, condition=UNSET, context=UNSET) -> ProcessBuilder: :param condition: A condition consists of one or more processes, which in the end return a boolean value. It is evaluated against each element in the array. An element is counted only if the condition returns `true`. Defaults to count valid elements in a list (see ``is_valid()``). Setting this parameter to boolean - `true` counts all elements in the list. + `true` counts all elements in the list. `false` is not a valid value for this parameter. :param context: Additional data to be passed to the condition. :return: The counted number of elements. @@ -3621,13 +3780,13 @@ def count(data, condition=UNSET, context=UNSET) -> ProcessBuilder: @openeo_process -def create_raster_cube() -> ProcessBuilder: +def create_data_cube() -> ProcessBuilder: """ - Create an empty raster data cube + Create an empty data cube - :return: An empty raster data cube with zero dimensions. + :return: An empty data cube with no dimensions. """ - return _process('create_raster_cube', ) + return _process('create_data_cube', ) @openeo_process @@ -3686,6 +3845,39 @@ def cumsum(data, ignore_nodata=UNSET) -> ProcessBuilder: return _process('cumsum', data=data, ignore_nodata=ignore_nodata) +@openeo_process +def date_between(x, min, max, exclude_max=UNSET) -> ProcessBuilder: + """ + Between comparison for dates and times + + :param x: The value to check. + :param min: Lower boundary (inclusive) to check against. + :param max: Upper boundary (inclusive) to check against. + :param exclude_max: Exclude the upper boundary `max` if set to `true`. Defaults to `false`. + + :return: `true` if `x` is between the specified bounds, otherwise `false`. + """ + return _process('date_between', x=x, min=min, max=max, exclude_max=exclude_max) + + +@openeo_process +def date_difference(date1, date2, unit=UNSET) -> ProcessBuilder: + """ + Computes the difference between two time instants + + :param date1: The base date, optionally with a time component. + :param date2: The other date, optionally with a time component. + :param unit: The unit for the returned value. The following units are available: - millisecond - second - + leap seconds are ignored in computations. - minute - hour - day - month - year + + :return: Returns the difference between date1 and date2 in the given unit (seconds by default), including a + fractional part if required. For comparison purposes this means: - If `date1` < `date2`, the returned + value is positive. - If `date1` = `date2`, the returned value is 0. - If `date1` > `date2`, the returned + value is negative. + """ + return _process('date_difference', date1=date1, date2=date2, unit=unit) + + @openeo_process def date_shift(date, value, unit) -> ProcessBuilder: """ @@ -3875,14 +4067,17 @@ def filter_labels(data, condition, dimension, context=UNSET) -> ProcessBuilder: @openeo_process def filter_spatial(data, geometries) -> ProcessBuilder: """ - Spatial filter using geometries + Spatial filter raster data cubes using geometries - :param data: A data cube. - :param geometries: One or more geometries used for filtering, specified as GeoJSON. + :param data: A raster data cube. + :param geometries: One or more geometries used for filtering, given as GeoJSON or vector data cube. If + multiple geometries are provided, the union of them is used. Empty geometries are ignored. Limits the data + cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the pixels of + the data cube use ``mask_polygon()``. - :return: A data cube restricted to the specified geometries. The dimensions and dimension properties (name, - type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions have - less (or the same) dimension labels. + :return: A raster data cube restricted to the specified geometries. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions + have less (or the same) dimension labels. """ return _process('filter_spatial', data=data, geometries=geometries) @@ -3894,14 +4089,14 @@ def filter_temporal(data, extent, dimension=UNSET) -> ProcessBuilder: :param data: A data cube. :param extent: Left-closed temporal interval, i.e. an array with exactly two elements: 1. The first - element is the start of the temporal interval. The specified instance in time is **included** in the - interval. 2. The second element is the end of the temporal interval. The specified instance in time is - **excluded** from the interval. The specified temporal strings follow [RFC 3339](https://www.rfc- - editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the boundaries to `null`, but - never both. - :param dimension: The name of the temporal dimension to filter on. If no specific dimension is specified or - it is set to `null`, the filter applies to all temporal dimensions. Fails with a `DimensionNotAvailable` - exception if the specified dimension does not exist. + element is the start of the temporal interval. The specified time instant is **included** in the interval. + 2. The second element is the end of the temporal interval. The specified time instant is **excluded** from + the interval. The second element must always be greater/later than the first element. Otherwise, a + `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by setting one of the + boundaries to `null`, but never both. + :param dimension: The name of the temporal dimension to filter on. If no specific dimension is specified, + the filter applies to all temporal dimensions. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. :return: A data cube restricted to the specified temporal extent. The dimensions and dimension properties (name, type, labels, reference system and resolution) remain unchanged, except that the temporal dimensions @@ -3910,6 +4105,24 @@ def filter_temporal(data, extent, dimension=UNSET) -> ProcessBuilder: return _process('filter_temporal', data=data, extent=extent, dimension=dimension) +@openeo_process +def filter_vector(data, geometries, relation=UNSET) -> ProcessBuilder: + """ + Spatial vector filter using geometries + + :param data: A vector data cube with the candidate geometries. + :param geometries: One or more base geometries used for filtering, given as vector data cube. If multiple + base geometries are provided, the union of them is used. + :param relation: The spatial filter predicate for comparing the geometries provided through (a) + `geometries` (base geometries) and (b) `data` (candidate geometries). + + :return: A vector data cube restricted to the specified geometries. The dimensions and dimension properties + (name, type, labels, reference system and resolution) remain unchanged, except that the geometries + dimension has less (or the same) dimension labels. + """ + return _process('filter_vector', data=data, geometries=geometries, relation=relation) + + @openeo_process def first(data, ignore_nodata=UNSET) -> ProcessBuilder: """ @@ -3926,81 +4139,31 @@ def first(data, ignore_nodata=UNSET) -> ProcessBuilder: @openeo_process -def fit_class_random_forest(predictors, target, max_variables, num_trees=UNSET, seed=UNSET) -> ProcessBuilder: - """ - Train a random forest classification model - - :param predictors: The predictors for the classification model as a vector data cube. Aggregated to the - features (vectors) of the target input variable. - :param target: The training sites for the classification model as a vector data cube. This is associated - with the target variable for the Random Forest model. The geometry has to associated with a value to - predict (e.g. fractional forest canopy cover). - :param max_variables: Specifies how many split variables will be used at a node. The following options are - available: - *integer*: The given number of variables are considered for each split. - `all`: All - variables are considered for each split. - `log2`: The logarithm with base 2 of the number of variables are - considered for each split. - `onethird`: A third of the number of variables are considered for each split. - - `sqrt`: The square root of the number of variables are considered for each split. This is often the - default for classification. - :param num_trees: The number of trees build within the Random Forest classification. - :param seed: A randomization seed to use for the random sampling in training. If not given or `null`, no - seed is used and results may differ on subsequent use. - - :return: A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``. - """ - return _process('fit_class_random_forest', predictors=predictors, target=target, max_variables=max_variables, num_trees=num_trees, seed=seed) - - -@openeo_process -def fit_curve(data, parameters, function, dimension) -> ProcessBuilder: +def fit_curve(data, parameters, function, ignore_nodata=UNSET) -> ProcessBuilder: """ Curve fitting - :param data: A data cube. + :param data: A labeled array, the labels correspond to the variable `y` and the values correspond to the + variable `x`. :param parameters: Defined the number of parameters for the model function and provides an initial guess for them. At least one parameter is required. :param function: The model function. It must take the parameters to fit as array through the first argument and the independent variable `x` as the second argument. It is recommended to store the model function as a user-defined process on the back-end to be able to re-use the model function with the computed optimal values for the parameters afterwards. - :param dimension: The name of the dimension for curve fitting. Must be a dimension with labels that have a - order (i.e. numerical labels or a temporal dimension). Fails with a `DimensionNotAvailable` exception if - the specified dimension does not exist. + :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting + this flag to `false` considers no-data values so that `null` is passed to the model function. - :return: A data cube with the optimal values for the parameters. + :return: An array with the optimal values for the parameters. """ return _process('fit_curve', data=data, parameters=parameters, function=build_child_callback(function, parent_parameters=['x', 'parameters']), - dimension=dimension + ignore_nodata=ignore_nodata ) -@openeo_process -def fit_regr_random_forest(predictors, target, max_variables, num_trees=UNSET, seed=UNSET) -> ProcessBuilder: - """ - Train a random forest regression model - - :param predictors: The predictors for the regression model as a vector data cube. Aggregated to the - features (vectors) of the target input variable. - :param target: The training sites for the regression model as a vector data cube. This is associated with - the target variable for the Random Forest model. The geometry has to associated with a value to predict - (e.g. fractional forest canopy cover). - :param max_variables: Specifies how many split variables will be used at a node. The following options are - available: - *integer*: The given number of variables are considered for each split. - `all`: All - variables are considered for each split. - `log2`: The logarithm with base 2 of the number of variables are - considered for each split. - `onethird`: A third of the number of variables are considered for each split. - This is often the default for regression. - `sqrt`: The square root of the number of variables are - considered for each split. - :param num_trees: The number of trees build within the Random Forest regression. - :param seed: A randomization seed to use for the random sampling in training. If not given or `null`, no - seed is used and results may differ on subsequent use. - - :return: A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``. - """ - return _process('fit_regr_random_forest', predictors=predictors, target=target, max_variables=max_variables, num_trees=num_trees, seed=seed) - - @openeo_process def flatten_dimensions(data, dimensions, target_dimension, label_separator=UNSET) -> ProcessBuilder: """ @@ -4076,19 +4239,19 @@ def if_(value, accept, reject=UNSET) -> ProcessBuilder: @openeo_process -def inspect(data, code=UNSET, level=UNSET, message=UNSET) -> ProcessBuilder: +def inspect(data, message=UNSET, code=UNSET, level=UNSET) -> ProcessBuilder: """ Add information to the logs :param data: Data to log. + :param message: A message to send in addition to the data. :param code: A label to help identify one or more log entries originating from this process in the list of all log entries. It can help to group or filter log entries and is usually not unique. :param level: The severity level of this message, defaults to `info`. - :param message: A message to send in addition to the data. :return: The data as passed to the `data` parameter without any modification. """ - return _process('inspect', data=data, code=code, level=level, message=message) + return _process('inspect', data=data, message=message, code=code, level=level) @openeo_process @@ -4122,7 +4285,7 @@ def is_nan(x) -> ProcessBuilder: :param x: The data to check. - :return: `true` if the data is not a number, otherwise `false`. + :return: Returns `true` for `NaN` and all non-numeric data types, otherwise returns `false`. """ return _process('is_nan', x=x) @@ -4201,24 +4364,26 @@ def load_collection(id, spatial_extent, temporal_extent, bands=UNSET, properties :param id: The collection id. :param spatial_extent: Limits the data to load from the collection to the specified bounding box or - polygons. The process puts a pixel into the data cube if the point at the pixel center intersects with the - bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). The GeoJSON - can be one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a - `Polygon` or `MultiPolygon` geometry, * a `FeatureCollection` containing at least one `Feature` with - `Polygon` or `MultiPolygon` geometries, or * a `GeometryCollection` containing `Polygon` or `MultiPolygon` - geometries. To maximize interoperability, `GeometryCollection` should be avoided in favour of one of the - alternatives above. Set this parameter to `null` to set no limit for the spatial extent. Be careful with - this when loading large datasets! It is recommended to use this parameter instead of using - ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard + by the OGC). * For vector data, the process loads the geometry into the data cube if the geometry is fully + *within* the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be + one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. * Empty geometries are ignored. Set this parameter to `null` to + set no limit for the spatial extent. Be careful with this when loading large datasets! It is recommended to + use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading + unbounded data. :param temporal_extent: Limits the data to load from the collection to the specified left-closed temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two - elements: 1. The first element is the start of the temporal interval. The specified instance in time is - **included** in the interval. 2. The second element is the end of the temporal interval. The specified - instance in time is **excluded** from the interval. The specified temporal strings follow [RFC - 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the - boundaries to `null`, but never both. Set this parameter to `null` to set no limit for the temporal - extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead - of using ``filter_temporal()`` directly after loading unbounded data. + elements: 1. The first element is the start of the temporal interval. The specified time instant is + **included** in the interval. 2. The second element is the end of the temporal interval. The specified time + instant is **excluded** from the interval. The second element must always be greater/later than the first + element. Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports unbounded intervals by + setting one of the boundaries to `null`, but never both. Set this parameter to `null` to set no limit for + the temporal extent. Be careful with this when loading large datasets! It is recommended to use this + parameter instead of using ``filter_temporal()`` directly after loading unbounded data. :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of band names are not available. Applies to all dimensions of type `bands`. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) @@ -4239,6 +4404,29 @@ def load_collection(id, spatial_extent, temporal_extent, bands=UNSET, properties return _process('load_collection', id=id, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) +@openeo_process +def load_geojson(data, properties=UNSET) -> ProcessBuilder: + """ + Converts GeoJSON into a vector data cube + + :param data: A GeoJSON object to convert into a vector data cube. The GeoJSON type `GeometryCollection` is + not supported. Each geometry in the GeoJSON data results in a dimension label in the `geometries` + dimension. + :param properties: A list of properties from the GeoJSON file to construct an additional dimension from. A + new dimension with the name `properties` and type `other` is created if at least one property is provided. + Only applies for GeoJSON Features and FeatureCollections. Missing values are generally set to no-data + (`null`). Depending on the number of properties provided, the process creates the dimension differently: + - Single property with scalar values: A single dimension label with the name of the property and a single + value per geometry. - Single property of type array: The dimension labels correspond to the array indices. + There are as many values and labels per geometry as there are for the largest array. - Multiple properties + with scalar values: The dimension labels correspond to the property names. There are as many values and + labels per geometry as there are properties provided here. + + :return: A vector data cube containing the geometries, either one or two dimensional. + """ + return _process('load_geojson', data=data, properties=properties) + + @openeo_process def load_ml_model(id) -> ProcessBuilder: """ @@ -4260,15 +4448,16 @@ def load_result(id, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) -> :param id: The id of a batch job with results. :param spatial_extent: Limits the data to load from the batch job result to the specified bounding box or - polygons. The process puts a pixel into the data cube if the point at the pixel center intersects with the - bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). The GeoJSON - can be one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a - `Polygon` or `MultiPolygon` geometry, * a `FeatureCollection` containing at least one `Feature` with - `Polygon` or `MultiPolygon` geometries, or * a `GeometryCollection` containing `Polygon` or `MultiPolygon` - geometries. To maximize interoperability, `GeometryCollection` should be avoided in favour of one of the - alternatives above. Set this parameter to `null` to set no limit for the spatial extent. Be careful with - this when loading large datasets! It is recommended to use this parameter instead of using - ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + polygons. * For raster data, the process loads the pixel into the data cube if the point at the pixel + center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard + by the OGC). * For vector data, the process loads the geometry into the data cube of the geometry is fully + within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. The GeoJSON can be + one of the following feature types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a + `Polygon` or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with + `Polygon` or `MultiPolygon` geometries. Set this parameter to `null` to set no limit for the spatial + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. :param temporal_extent: Limits the data to load from the batch job result to the specified left-closed temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. The first element is the start of the temporal interval. The specified instance @@ -4291,6 +4480,57 @@ def load_result(id, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET) -> return _process('load_result', id=id, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands) +@openeo_process +def load_stac(url, spatial_extent=UNSET, temporal_extent=UNSET, bands=UNSET, properties=UNSET) -> ProcessBuilder: + """ + Loads data from STAC + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific + STAC API Collection that allows to filter items and to download assets. This includes batch job results, + which itself are compliant to STAC. For external URLs, authentication details such as API keys or tokens + may need to be included in the URL. Batch job results can be specified in two ways: - For Batch job + results at the same back-end, a URL pointing to the corresponding batch job results endpoint should be + provided. The URL usually ends with `/jobs/{id}/results` and `{id}` is the corresponding batch job ID. - + For external results, a signed URL must be provided. Not all back-ends support signed URLs, which are + provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: Limits the data to load to the specified bounding box or polygons. * For raster + data, the process loads the pixel into the data cube if the point at the pixel center intersects with the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). * For vector + data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or + any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be + in the data cube if no spatial extent has been provided. The GeoJSON can be one of the following feature + types: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or `MultiPolygon` + geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` + geometries. Set this parameter to `null` to set no limit for the spatial extent. Be careful with this when + loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` or + ``filter_spatial()`` directly after loading unbounded data. + :param temporal_extent: Limits the data to load to the specified left-closed temporal interval. Applies to + all temporal dimensions. The interval has to be specified as an array with exactly two elements: 1. The + first element is the start of the temporal interval. The specified instance in time is **included** in the + interval. 2. The second element is the end of the temporal interval. The specified instance in time is + **excluded** from the interval. The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. Also supports open intervals by setting one of the + boundaries to `null`, but never both. Set this parameter to `null` to set no limit for the temporal + extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead + of using ``filter_temporal()`` directly after loading unbounded data. + :param bands: Only adds the specified bands into the data cube so that bands that don't match the list of + band names are not available. Applies to all dimensions of type `bands`. Either the unique band name + (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) + can be specified. If the unique band name and the common name conflict, the unique band name has a higher + priority. The order of the specified array defines the order of the bands in the data cube. If multiple + bands match a common name, all matched bands are included in the original order. It is recommended to use + this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + :param properties: Limits the data by metadata properties to include only data in the data cube which all + given conditions return `true` for (AND operation). Specify key-value-pairs with the key being the name of + the metadata property, which can be retrieved with the openEO Data Discovery for Collections. The value + must be a condition (user-defined process) to be evaluated against a STAC API. This parameter is not + supported for static STAC. + + :return: A data cube for further processing. + """ + return _process('load_stac', url=url, spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=bands, properties=properties) + + @openeo_process def load_uploaded_files(paths, format, options=UNSET) -> ProcessBuilder: """ @@ -4311,6 +4551,26 @@ def load_uploaded_files(paths, format, options=UNSET) -> ProcessBuilder: return _process('load_uploaded_files', paths=paths, format=format, options=options) +@openeo_process +def load_url(url, format, options=UNSET) -> ProcessBuilder: + """ + Load data from a URL + + :param url: The URL to read from. Authentication details such as API keys or tokens may need to be included + in the URL. + :param format: The file format to use when loading the data. It must be one of the values that the server + reports as supported input file formats, which usually correspond to the short GDAL/OGR codes. If the + format is not suitable for loading the data, a `FormatUnsuitable` exception will be thrown. This parameter + is *case insensitive*. + :param options: The file format parameters to use when reading the data. Must correspond to the parameters + that the server reports as supported parameters for the chosen `format`. The parameter names and valid + values usually correspond to the GDAL/OGR format options. + + :return: A data cube for further processing. + """ + return _process('load_url', url=url, format=format, options=options) + + @openeo_process def log(x, base) -> ProcessBuilder: """ @@ -4372,11 +4632,10 @@ def mask_polygon(data, mask, replacement=UNSET, inside=UNSET) -> ProcessBuilder: Apply a polygon mask :param data: A raster data cube. - :param mask: A GeoJSON object containing at least one polygon. The provided feature types can be one of the - following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` or `MultiPolygon` - geometry, * a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` - geometries, or * a `GeometryCollection` containing `Polygon` or `MultiPolygon` geometries. To maximize - interoperability, `GeometryCollection` should be avoided in favour of one of the alternatives above. + :param mask: A GeoJSON object or a vector data cube containing at least one polygon. The provided vector + data can be one of the following: * A `Polygon` or `MultiPolygon` geometry, * a `Feature` with a `Polygon` + or `MultiPolygon` geometry, or * a `FeatureCollection` containing at least one `Feature` with `Polygon` or + `MultiPolygon` geometries. * Empty geometries are ignored. :param replacement: The value used to replace masked values with. :param inside: If set to `true` all pixels for which the point at the pixel center **does** intersect with any polygon are replaced. @@ -4434,8 +4693,8 @@ def merge_cubes(cube1, cube2, overlap_resolver=UNSET, context=UNSET) -> ProcessB """ Merge two data cubes - :param cube1: The first data cube. - :param cube2: The second data cube. + :param cube1: The base data cube. + :param cube2: The other data cube to be merged with the base data cube. :param overlap_resolver: A reduction operator that resolves the conflict if the data overlaps. The reducer must return a value of the same data type as the input values are. The reduction operator may be a single process such as ``multiply()`` or consist of multiple sub-processes. `null` (the default) can be specified @@ -4592,7 +4851,7 @@ def or_(x, y) -> ProcessBuilder: @openeo_process def order(data, asc=UNSET, nodata=UNSET) -> ProcessBuilder: """ - Create a permutation + Get the order of array elements :param data: An array to compute the order for. :param asc: The default sort order is ascending, with smallest values first. To sort in reverse @@ -4629,24 +4888,22 @@ def power(base, p) -> ProcessBuilder: @openeo_process -def predict_curve(data, parameters, function, dimension, labels=UNSET) -> ProcessBuilder: +def predict_curve(parameters, function, dimension, labels=UNSET) -> ProcessBuilder: """ Predict values - :param data: A data cube to predict values for. - :param parameters: A data cube with optimal values from a result of e.g. ``fit_curve()``. + :param parameters: A data cube with optimal values, e.g. computed by the process ``fit_curve()``. :param function: The model function. It must take the parameters to fit as array through the first argument and the independent variable `x` as the second argument. It is recommended to store the model function as a user-defined process on the back-end. - :param dimension: The name of the dimension for predictions. Fails with a `DimensionNotAvailable` exception - if the specified dimension does not exist. + :param dimension: The name of the dimension for predictions. :param labels: The labels to predict values for. If no labels are given, predicts values only for no-data (`null`) values in the data cube. - :return: A data cube with the predicted values. + :return: A data cube with the predicted values with the provided dimension `dimension` having as many + labels as provided through `labels`. """ return _process('predict_curve', - data=data, parameters=parameters, function=build_child_callback(function, parent_parameters=['x', 'parameters']), dimension=dimension, @@ -4657,7 +4914,7 @@ def predict_curve(data, parameters, function, dimension, labels=UNSET) -> Proces @openeo_process def predict_random_forest(data, model) -> ProcessBuilder: """ - Predict values from a Random Forest model + Predict values based on a Random Forest model :param data: An array of numbers. :param model: A model object that can be trained with the processes ``fit_regr_random_forest()`` @@ -4688,10 +4945,13 @@ def quantiles(data, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> Proces Quantiles :param data: An array of numbers. - :param probabilities: A list of probabilities to calculate quantiles for. The probabilities must be between - 0 and 1 (inclusive). + :param probabilities: Quantiles to calculate. Either a list of probabilities or the number of intervals: * + Provide an array with a sorted list of probabilities in ascending order to calculate quantiles for. The + probabilities must be between 0 and 1 (inclusive). If not sorted in ascending order, an + `AscendingProbabilitiesRequired` exception is thrown. * Provide an integer to specify the number of + intervals to calculate quantiles for. Calculates q-quantiles with equal-sized intervals. :param q: Number of intervals to calculate quantiles for. Calculates q-quantiles with equal-sized - intervals. + intervals. This parameter has been **deprecated**. Please use the parameter `probabilities` instead. :param ignore_nodata: Indicates whether no-data values are ignored or not. Ignores them by default. Setting this flag to `false` considers no-data values so that an array with `null` values is returned if any element is such a value. @@ -4707,7 +4967,7 @@ def quantiles(data, probabilities=UNSET, q=UNSET, ignore_nodata=UNSET) -> Proces @openeo_process def rearrange(data, order) -> ProcessBuilder: """ - Rearrange an array based on a permutation + Sort an array based on a permutation :param data: The array to rearrange. :param order: The permutation used for rearranging. @@ -4747,7 +5007,7 @@ def reduce_spatial(data, reducer, context=UNSET) -> ProcessBuilder: """ Reduce spatial dimensions 'x' and 'y' - :param data: A data cube. + :param data: A raster data cube. :param reducer: A reducer to apply on the horizontal spatial dimensions. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category 'reducer' for such processes. @@ -4807,8 +5067,8 @@ def resample_cube_spatial(data, target, method=UNSET) -> ProcessBuilder: """ Resample the spatial dimensions to match a target data cube - :param data: A data cube. - :param target: A data cube that describes the spatial target resolution. + :param data: A raster data cube. + :param target: A raster data cube that describes the spatial target resolution. :param method: Resampling method to use. The following options are available and are meant to align with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling * `cubic`: @@ -4821,9 +5081,9 @@ def resample_cube_spatial(data, target, method=UNSET) -> ProcessBuilder: value of all valid pixels * `rms` root mean square (quadratic mean) of all valid pixels * `sum`: compute the weighted sum of all valid pixels Valid pixels are determined based on the function ``is_valid()``. - :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference - system and resolution) remain unchanged, except for the resolution and dimension labels of the spatial - dimensions. + :return: A raster data cube with the same dimensions. The dimension properties (name, type, labels, + reference system and resolution) remain unchanged, except for the resolution and dimension labels of the + spatial dimensions. """ return _process('resample_cube_spatial', data=data, target=target, method=method) @@ -4847,7 +5107,7 @@ def resample_cube_temporal(data, target, dimension=UNSET, valid_within=UNSET) -> `2020-01-15 12:00:00` looks for a nearest neighbor after `2020-01-08 12:00:00` and before `2020-01-22 12:00:00`. If no valid value is found within the given period, the value will be set to no-data (`null`). - :return: A raster data cube with the same dimensions and the same dimension properties (name, type, labels, + :return: A data cube with the same dimensions and the same dimension properties (name, type, labels, reference system and resolution) for all non-temporal dimensions. For the temporal dimension, the name and type remain unchanged, but the dimension labels, resolution and reference system may change. """ @@ -4864,9 +5124,9 @@ def resample_spatial(data, resolution=UNSET, projection=UNSET, method=UNSET, ali separate values for x and y or as a single value for both axes. Specified in the units of the target projection. Doesn't change the resolution by default (`0`). :param projection: Warps the data cube to the target projection, specified as as [EPSG - code](http://www.epsg-registry.org/), [WKT2 (ISO 19162) - string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html), [PROJ definition - (deprecated)](https://proj.org/usage/quickstart.html). By default (`null`), the projection is not changed. + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). By default (`null`), the projection is + not changed. :param method: Resampling method to use. The following options are available and are meant to align with [`gdalwarp`](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r): * `average`: average (mean) resampling, computes the weighted average of all valid pixels * `bilinear`: bilinear resampling * `cubic`: @@ -4978,19 +5238,6 @@ def sar_backscatter(data, coefficient=UNSET, elevation_model=UNSET, mask=UNSET, ) -@openeo_process -def save_ml_model(data, options=UNSET) -> ProcessBuilder: - """ - Save a ML model - - :param data: The data to store as a machine learning model. - :param options: Additional parameters to create the file(s). - - :return: Returns `false` if the process failed to store the model, `true` otherwise. - """ - return _process('save_ml_model', data=data, options=options) - - @openeo_process def save_result(data, format, options=UNSET) -> ProcessBuilder: """ @@ -4998,14 +5245,16 @@ def save_result(data, format, options=UNSET) -> ProcessBuilder: :param data: The data to deliver in the given file format. :param format: The file format to use. It must be one of the values that the server reports as supported - output file formats, which usually correspond to the short GDAL/OGR codes. If the format is not suitable - for storing the underlying data structure, a `FormatUnsuitable` exception will be thrown. This parameter is - *case insensitive*. + output file formats, which usually correspond to the short GDAL/OGR codes. This parameter is *case + insensitive*. * If the data cube is empty and the file format can't store empty data cubes, a + `DataCubeEmpty` exception is thrown. * If the file format is otherwise not suitable for storing the + underlying data structure, a `FormatUnsuitable` exception is thrown. :param options: The file format parameters to be used to create the file(s). Must correspond to the parameters that the server reports as supported parameters for the chosen `format`. The parameter names and valid values usually correspond to the GDAL/OGR format options. - :return: Returns `false` if the process failed to make the data available, `true` otherwise. + :return: Always returns `true` as in case of an error an exception is thrown which aborts the execution of + the process. """ return _process('save_result', data=data, format=format, options=options) @@ -5201,10 +5450,10 @@ def trim_cube(data) -> ProcessBuilder: """ Remove dimension labels with no-data values - :param data: A raster data cube to trim. + :param data: A data cube to trim. - :return: A trimmed raster data cube with the same dimensions. The dimension properties name, type, - reference system and resolution remain unchanged. The number of dimension labels may decrease. + :return: A trimmed data cube with the same dimensions. The dimension properties name, type, reference + system and resolution remain unchanged. The number of dimension labels may decrease. """ return _process('trim_cube', data=data) @@ -5249,27 +5498,43 @@ def vector_buffer(geometries, distance) -> ProcessBuilder: """ Buffer geometries by distance - :param geometries: Geometries to apply the buffer on. Vector properties are preserved for vector data cubes - and all GeoJSON Features. To maximize interoperability, a nested `GeometryCollection` should be avoided. - Furthermore, a `GeometryCollection` composed of a single type of geometries should be avoided in favour of - the corresponding multi-part type (e.g. `MultiPolygon`). - :param distance: The distance of the buffer in the unit of the spatial reference system. A positive - distance expands the geometries and results in outward buffering (dilation) while a negative distance - shrinks the geometries and results in inward buffering (erosion). + :param geometries: Geometries to apply the buffer on. Feature properties are preserved. + :param distance: The distance of the buffer in meters. A positive distance expands the geometries, + resulting in outward buffering (dilation), while a negative distance shrinks the geometries, resulting in + inward buffering (erosion). If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. - :return: Returns a vector data cube with the computed new geometries. + :return: Returns a vector data cube with the computed new geometries of which some may be empty. """ return _process('vector_buffer', geometries=geometries, distance=distance) +@openeo_process +def vector_reproject(data, projection, dimension=UNSET) -> ProcessBuilder: + """ + Reprojects the geometry dimension + + :param data: A vector data cube. + :param projection: Coordinate reference system to reproject to. Specified as an [EPSG + code](http://www.epsg-registry.org/) or [WKT2 CRS + string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). + :param dimension: The name of the geometry dimension to reproject. If no specific dimension is specified, + the filter applies to all geometry dimensions. Fails with a `DimensionNotAvailable` exception if the + specified dimension does not exist. + + :return: A vector data cube with geometries projected to the new coordinate reference system. The reference + system of the geometry dimension changes, all other dimensions and properties remain unchanged. + """ + return _process('vector_reproject', data=data, projection=projection, dimension=dimension) + + @openeo_process def vector_to_random_points(data, geometry_count=UNSET, total_count=UNSET, group=UNSET, seed=UNSET) -> ProcessBuilder: """ Sample random points from geometries - :param data: Input geometries for sample extraction. To maximize interoperability, a nested - `GeometryCollection` should be avoided. Furthermore, a `GeometryCollection` composed of a single type of - geometries should be avoided in favour of the corresponding multi-part type (e.g. `MultiPolygon`). + :param data: Input geometries for sample extraction. :param geometry_count: The maximum number of points to compute per geometry. Points in the input geometries can be selected only once by the sampling. :param total_count: The maximum number of points to compute overall. Throws a `CountMismatch` exception if @@ -5291,17 +5556,17 @@ def vector_to_regular_points(data, distance, group=UNSET) -> ProcessBuilder: """ Sample regular points from geometries - :param data: Input geometries for sample extraction. To maximize interoperability, a nested - `GeometryCollection` should be avoided. Furthermore, a `GeometryCollection` composed of a single type of - geometries should be avoided in favour of the corresponding multi-part type (e.g. `MultiPolygon`). - :param distance: Defines the minimum distance in the unit of the reference system that is required between - two samples generated *inside* a single geometry. - For **polygons**, the distance defines the cell sizes - of a regular grid that starts at the upper-left bound of each polygon. The centroid of each cell is then a - sample point. If the centroid is not enclosed in the polygon, no point is sampled. If no point can be - sampled for the geometry at all, the first coordinate of the geometry is returned as point. - For **lines** - (line strings), the sampling starts with a point at the first coordinate of the line and then walks along - the line and samples a new point each time the distance to the previous point has been reached again. - For - **points**, the point is returned as given. + :param data: Input geometries for sample extraction. + :param distance: Defines the minimum distance in meters that is required between two samples generated + *inside* a single geometry. If the unit of the spatial reference system is not meters, a `UnitMismatch` + error is thrown. Use ``vector_reproject()`` to convert the geometries to a suitable spatial reference + system. - For **polygons**, the distance defines the cell sizes of a regular grid that starts at the + upper-left bound of each polygon. The centroid of each cell is then a sample point. If the centroid is not + enclosed in the polygon, no point is sampled. If no point can be sampled for the geometry at all, the first + coordinate of the geometry is returned as point. - For **lines** (line strings), the sampling starts with a + point at the first coordinate of the line and then walks along the line and samples a new point each time + the distance to the previous point has been reached again. - For **points**, the point is returned as + given. :param group: Specifies whether the sampled points should be grouped by input geometry (default) or be generated as independent points. * If the sampled points are grouped, the process generates a `MultiPoint` per geometry given which keeps the original identifier if present. * Otherwise, each sampled point is diff --git a/specs/openeo-processes-legacy/README.md b/specs/openeo-processes-legacy/README.md new file mode 100644 index 000000000..a1c3e2c65 --- /dev/null +++ b/specs/openeo-processes-legacy/README.md @@ -0,0 +1,3 @@ +Collection of process definitions of processes that were removed +from `openeo-processes` project, but should not yet be removed from +`openeo.processes`. diff --git a/specs/openeo-processes-legacy/load_ml_model.json b/specs/openeo-processes-legacy/load_ml_model.json new file mode 100644 index 000000000..151513c80 --- /dev/null +++ b/specs/openeo-processes-legacy/load_ml_model.json @@ -0,0 +1,53 @@ +{ + "id": "load_ml_model", + "summary": "Load a ML model", + "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``fit_regr_random_forest()`` and ``save_ml_model()``.", + "categories": [ + "machine learning", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "id", + "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", + "schema": [ + { + "title": "URL", + "type": "string", + "format": "uri", + "subtype": "uri", + "pattern": "^https?://" + }, + { + "title": "Batch Job ID", + "description": "Loading a model by batch job ID is possible only if a single model has been saved by the job. Otherwise, you have to load a specific model from a batch job by URL.", + "type": "string", + "subtype": "job-id", + "pattern": "^[\\w\\-\\.~]+$" + }, + { + "title": "User-uploaded File", + "type": "string", + "subtype": "file-path", + "pattern": "^[^\r\n\\:'\"]+$" + } + ] + } + ], + "returns": { + "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://github.com/stac-extensions/ml-model", + "title": "STAC ml-model extension", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/specs/openeo-processes-legacy/load_result.json b/specs/openeo-processes-legacy/load_result.json new file mode 100644 index 000000000..1aca00b9f --- /dev/null +++ b/specs/openeo-processes-legacy/load_result.json @@ -0,0 +1,212 @@ +{ + "id": "load_result", + "summary": "Load batch job results", + "description": "Loads batch job results and returns them as a processable data cube. A batch job result can be loaded by ID or URL:\n\n* **ID**: The identifier for a finished batch job. The job must have been submitted by the authenticated user on the back-end currently connected to.\n* **URL**: The URL to the STAC metadata for a batch job result. This is usually a signed URL that is provided by some back-ends since openEO API version 1.1.0 through the `canonical` link relation in the batch job result metadata.\n\nIf supported by the underlying metadata and file format, the data that is added to the data cube can be restricted with the parameters `spatial_extent`, `temporal_extent` and `bands`. If no data is available for the given extents, a `NoDataAvailable` exception is thrown.\n\n**Remarks:**\n\n* The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as specified in the metadata if the `bands` parameter is set to `null`.\n* If no additional parameter is specified this would imply that the whole data set is expected to be loaded. Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only load the data that is actually required after evaluating subsequent processes such as filters. This means that the values should be processed only after the data has been limited to the required extent and as a consequence also to a manageable size.", + "categories": [ + "cubes", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "id", + "description": "The id of a batch job with results.", + "schema": [ + { + "title": "ID", + "type": "string", + "subtype": "job-id", + "pattern": "^[\\w\\-\\.~]+$" + }, + { + "title": "URL", + "type": "string", + "format": "uri", + "subtype": "uri", + "pattern": "^https?://" + } + ] + }, + { + "name": "spatial_extent", + "description": "Limits the data to load from the batch job result to the specified bounding box or polygons.\n\n* For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\n* For vector data, the process loads the geometry into the data cube of the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\n\nThe GeoJSON can be one of the following feature types:\n\n* A `Polygon` or `MultiPolygon` geometry,\n* a `Feature` with a `Polygon` or `MultiPolygon` geometry, or\n* a `FeatureCollection` containing at least one `Feature` with `Polygon` or `MultiPolygon` geometries.\n\nSet this parameter to `null` to set no limit for the spatial extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead of using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data.", + "schema": [ + { + "title": "Bounding Box", + "type": "object", + "subtype": "bounding-box", + "required": [ + "west", + "south", + "east", + "north" + ], + "properties": { + "west": { + "description": "West (lower left corner, coordinate axis 1).", + "type": "number" + }, + "south": { + "description": "South (lower left corner, coordinate axis 2).", + "type": "number" + }, + "east": { + "description": "East (upper right corner, coordinate axis 1).", + "type": "number" + }, + "north": { + "description": "North (upper right corner, coordinate axis 2).", + "type": "number" + }, + "base": { + "description": "Base (optional, lower left corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "height": { + "description": "Height (optional, upper right corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "title": "EPSG Code", + "type": "integer", + "subtype": "epsg-code", + "minimum": 1000, + "examples": [ + 3857 + ] + }, + { + "title": "WKT2", + "type": "string", + "subtype": "wkt2-definition" + } + ], + "default": 4326 + } + } + }, + { + "title": "GeoJSON", + "description": "Limits the data cube to the bounding box of the given geometries. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`).\n\nThe GeoJSON type `GeometryCollection` is not supported. Empty geometries are ignored.", + "type": "object", + "subtype": "geojson" + }, + { + "title": "Vector data cube", + "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. All pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + }, + { + "title": "No filter", + "description": "Don't filter spatially. All data is included in the data cube.", + "type": "null" + } + ], + "default": null, + "optional": true + }, + { + "name": "temporal_extent", + "description": "Limits the data to load from the batch job result to the specified left-closed temporal interval. Applies to all temporal dimensions. The interval has to be specified as an array with exactly two elements:\n\n1. The first element is the start of the temporal interval. The specified instance in time is **included** in the interval.\n2. The second element is the end of the temporal interval. The specified instance in time is **excluded** from the interval.\n\nThe specified temporal strings follow [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339.html). Also supports open intervals by setting one of the boundaries to `null`, but never both.\n\nSet this parameter to `null` to set no limit for the temporal extent. Be careful with this when loading large datasets! It is recommended to use this parameter instead of using ``filter_temporal()`` directly after loading unbounded data.", + "schema": [ + { + "type": "array", + "subtype": "temporal-interval", + "minItems": 2, + "maxItems": 2, + "items": { + "anyOf": [ + { + "type": "string", + "format": "date-time", + "subtype": "date-time" + }, + { + "type": "string", + "format": "date", + "subtype": "date" + }, + { + "type": "string", + "subtype": "year", + "minLength": 4, + "maxLength": 4, + "pattern": "^\\d{4}$" + }, + { + "type": "null" + } + ] + }, + "examples": [ + [ + "2015-01-01T00:00:00Z", + "2016-01-01T00:00:00Z" + ], + [ + "2015-01-01", + "2016-01-01" + ] + ] + }, + { + "title": "No filter", + "description": "Don't filter temporally. All data is included in the data cube.", + "type": "null" + } + ], + "default": null, + "optional": true + }, + { + "name": "bands", + "description": "Only adds the specified bands into the data cube so that bands that don't match the list of band names are not available. Applies to all dimensions of type `bands`.\n\nEither the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands) can be specified. If the unique band name and the common name conflict, the unique band name has a higher priority.\n\nThe order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order.\n\nIt is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data.", + "schema": [ + { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "subtype": "band-name" + } + }, + { + "title": "No filter", + "description": "Don't filter bands. All bands are included in the data cube.", + "type": "null" + } + ], + "default": null, + "optional": true + } + ], + "returns": { + "description": "A data cube for further processing.", + "schema": { + "type": "object", + "subtype": "datacube" + } + }, + "exceptions": { + "NoDataAvailable": { + "message": "There is no data available for the given extents." + } + } +} diff --git a/specs/openeo-processes-legacy/predict_random_forest.json b/specs/openeo-processes-legacy/predict_random_forest.json new file mode 100644 index 000000000..62c54e9f2 --- /dev/null +++ b/specs/openeo-processes-legacy/predict_random_forest.json @@ -0,0 +1,42 @@ +{ + "id": "predict_random_forest", + "summary": "Predict values based on a Random Forest model", + "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", + "categories": [ + "machine learning", + "reducer" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "An array of numbers.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + }, + { + "name": "model", + "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } + ], + "returns": { + "description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.", + "schema": { + "type": [ + "number", + "null" + ] + } + } +} diff --git a/specs/update-subrepos.py b/specs/update-subrepos.py index e5f430915..e7121bf16 100755 --- a/specs/update-subrepos.py +++ b/specs/update-subrepos.py @@ -37,7 +37,7 @@ def main(): subrepos = [ SubRepo( url="https://github.com/Open-EO/openeo-processes.git", - rev="ca9e31094b863233d88459b6cf2a37416bc90d4e", + rev="2.0.0-rc.1", path="openeo-processes", ) ] @@ -81,6 +81,7 @@ def ensure_subrepo(subrepo: SubRepo): raise RuntimeError(f"{path} exists but does not look like a git repo") # Checkout to desired revision + run_command(["git", "fetch"], cwd=path) run_command(["git", "checkout", subrepo.rev], cwd=path) run_command(["git", "log", "-1"], cwd=path) run_command(["git", "submodule", "update", "--init", "--recursive"], cwd=path) diff --git a/tests/internal/processes/test_generator.py b/tests/internal/processes/test_generator.py index 25507b6e6..57b931bd9 100644 --- a/tests/internal/processes/test_generator.py +++ b/tests/internal/processes/test_generator.py @@ -1,7 +1,10 @@ import re +import shutil from io import StringIO from textwrap import dedent +import pytest + from openeo.internal.processes.generator import ( PythonRenderer, collect_processes, @@ -282,7 +285,7 @@ def apply_dimension(data, dimension, process): :return: Data cube """ - return _process('apply_dimension', + return _process('apply_dimension', data=data, dimension=dimension, process=build_child_callback(process, parent_parameters=['data']) @@ -303,6 +306,13 @@ def test_collect_processes_multiple_sources(tmp_path): assert [p.id for p in processes] == ["add", "cos"] +def test_collect_processes_duplicates(tmp_path): + shutil.copy(get_test_resource("data/processes/1.0/cos.json"), tmp_path / "foo.json") + shutil.copy(get_test_resource("data/processes/1.0/cos.json"), tmp_path / "bar.json") + with pytest.raises(Exception, match="Duplicate source for process 'cos'"): + _ = collect_processes(sources=[tmp_path]) + + def test_generate_process_py(): processes = [ Process.from_dict({