From 5a35e6427a0301e04bfe541c4b1027412df094b3 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Thu, 28 Sep 2023 16:43:31 +0200 Subject: [PATCH 1/7] fix bug in affinitycall's stream implementation this bug caused the backend to always use the slow path of `.collect().stream()`, which is particularily slow for medium to large bounding boxes, as it will not use the preflight optimizations implemented for `stream`. --- .../mapreducer/backend/MapReducerIgniteAffinityCall.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java index d9561c158..1579f9d48 100644 --- a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java +++ b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java @@ -4,6 +4,7 @@ import com.google.common.primitives.Ints; import java.io.IOException; import java.sql.SQLException; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -13,6 +14,7 @@ import java.util.Optional; import java.util.TreeMap; import java.util.function.Function; +import java.util.stream.Collectors; import java.util.stream.LongStream; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -258,12 +260,12 @@ cacheName, cellIdRangeToCellIds(), cellIdRanges, cellProcessor, cellIterator cacheName, cellIdRangeToCellIds(), cellIdRanges, cellProcessor, cellIterator ); } - List cellsWithData = asyncGetHandleTimeouts( + ArrayList cellsWithData = asyncGetHandleTimeouts( compute.broadcastAsync(preflight), this.timeout ).stream() .flatMap(Collection::stream) - .toList(); + .collect(Collectors.toCollection(ArrayList::new)); Collections.shuffle(cellsWithData); Stream resultForType = cellsWithData.parallelStream() .filter(ignored -> this.isActive()) From e85292b1b505c508d3aaa25981bf26c05e8dc0e4 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 29 Sep 2023 11:40:13 +0200 Subject: [PATCH 2/7] only use fallback code if OSHDB-specific exception is thrown --- .../backend/MapReducerIgniteLocalPeek.java | 9 +++++---- .../ohsome/oshdb/api/mapreducer/MapReducer.java | 3 ++- .../exceptions/OSHDBNotImplementedException.java | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 oshdb-util/src/main/java/org/heigit/ohsome/oshdb/util/exceptions/OSHDBNotImplementedException.java diff --git a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java index 7412b1e89..89b9694d3 100644 --- a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java +++ b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java @@ -34,6 +34,7 @@ import org.heigit.ohsome.oshdb.util.celliterator.CellIterator; import org.heigit.ohsome.oshdb.util.celliterator.OSHEntitySource; import org.heigit.ohsome.oshdb.util.exceptions.OSHDBTimeoutException; +import org.heigit.ohsome.oshdb.util.exceptions.OSHDBNotImplementedException; import org.heigit.ohsome.oshdb.util.function.OSHEntityFilter; import org.heigit.ohsome.oshdb.util.function.OSMEntityFilter; import org.heigit.ohsome.oshdb.util.function.SerializableBiFunction; @@ -80,28 +81,28 @@ protected MapReducer copy() { protected Stream mapStreamCellsOSMContribution( SerializableFunction> mapper ) throws Exception { - throw new UnsupportedOperationException("Stream function not yet implemented"); + throw new OSHDBNotImplementedException("Stream function not yet implemented"); } @Override protected Stream flatMapStreamCellsOSMContributionGroupedById( SerializableFunction, Iterable> mapper ) throws Exception { - throw new UnsupportedOperationException("Stream function not yet implemented"); + throw new OSHDBNotImplementedException("Stream function not yet implemented"); } @Override protected Stream mapStreamCellsOSMEntitySnapshot( SerializableFunction> mapper ) throws Exception { - throw new UnsupportedOperationException("Stream function not yet implemented"); + throw new OSHDBNotImplementedException("Stream function not yet implemented"); } @Override protected Stream flatMapStreamCellsOSMEntitySnapshotGroupedById( SerializableFunction, Iterable> mapper ) throws Exception { - throw new UnsupportedOperationException("Stream function not yet implemented"); + throw new OSHDBNotImplementedException("Stream function not yet implemented"); } private List cacheNames(String prefix) { diff --git a/oshdb-api/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.java index 7b78df096..58e69fe16 100644 --- a/oshdb-api/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.java +++ b/oshdb-api/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.java @@ -50,6 +50,7 @@ import org.heigit.ohsome.oshdb.osm.OSMType; import org.heigit.ohsome.oshdb.util.OSHDBTagKey; import org.heigit.ohsome.oshdb.util.exceptions.OSHDBInvalidTimestampException; +import org.heigit.ohsome.oshdb.util.exceptions.OSHDBNotImplementedException; import org.heigit.ohsome.oshdb.util.function.OSHEntityFilter; import org.heigit.ohsome.oshdb.util.function.OSMEntityFilter; import org.heigit.ohsome.oshdb.util.function.SerializableBiFunction; @@ -1343,7 +1344,7 @@ public List collect() throws Exception { public Stream stream() throws Exception { try { return this.streamInternal(); - } catch (UnsupportedOperationException e) { + } catch (OSHDBNotImplementedException e) { LOG.info("stream not directly supported by chosen backend, falling back to " + ".collect().stream()"); return this.collect().stream(); diff --git a/oshdb-util/src/main/java/org/heigit/ohsome/oshdb/util/exceptions/OSHDBNotImplementedException.java b/oshdb-util/src/main/java/org/heigit/ohsome/oshdb/util/exceptions/OSHDBNotImplementedException.java new file mode 100644 index 000000000..6a8ebc5c1 --- /dev/null +++ b/oshdb-util/src/main/java/org/heigit/ohsome/oshdb/util/exceptions/OSHDBNotImplementedException.java @@ -0,0 +1,14 @@ +package org.heigit.ohsome.oshdb.util.exceptions; + +/** + * An exception which is thrown when a particular feature is not implemented in the OSHDB. + * + *

+ * Mostly used internally for specific code paths for which fallback routines are implemented. + *

+ */ +public class OSHDBNotImplementedException extends OSHDBException { + public OSHDBNotImplementedException(String message) { + super(message); + } +} From 4f27beb4196b3322ac00b61397d3d849a9c84b33 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 29 Sep 2023 11:39:35 +0200 Subject: [PATCH 3/7] drop unused dependency version --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index a981f1781..641a36536 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,6 @@ 31.1-jre 1.19.0 - 0.17.0 1.4.197 42.5.4 From 55fadd5d77557515f2a050414d77fa63a9ddec1d Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 29 Sep 2023 11:57:49 +0200 Subject: [PATCH 4/7] add to changelog, prepare 1.2.1 patch release --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3c484fac..a35f2fe81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ Changelog ## 1.3.0-SNAPSHOT (current master) +## 1.2.1 + +* Fix performance degradation in the streaming endpoints when running on Ignite using the `AFFINITY_CALL` backend ([#516]) + +[#516]: https://github.com/GIScience/oshdb/pull/516 + + ## 1.2.0 ### new features From 959b289a687084ce0b0442d02f580300516e609e Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 29 Sep 2023 12:03:11 +0200 Subject: [PATCH 5/7] prepare release notes for v1.2.1 patch release --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- CITATION.cff | 4 +-- README.md | 4 +-- documentation/README.md | 2 +- documentation/first-steps/README.md | 4 +-- documentation/first-steps/example-pom.xml | 2 +- documentation/manual/aggregation.md | 12 +++---- documentation/manual/database-backends.md | 10 +++--- documentation/manual/filters.md | 18 +++++----- documentation/manual/geometries.md | 2 +- .../manual/helpers/OSHDBApplication.md | 2 +- documentation/manual/helpers/OSHDBDriver.md | 2 +- documentation/manual/installation.md | 2 +- documentation/manual/map-reduce.md | 36 +++++++++---------- documentation/manual/views.md | 34 +++++++++--------- 15 files changed, 68 insertions(+), 68 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 50a17407c..e47a51dc6 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -30,5 +30,5 @@ Add any other context about the problem here. Please complete the following information: - OS: [e.g. Ubuntu 20.04 LTS] - Java Version: [e.g. openjdk version "11.0.9.1"] - - OSHDB Version: [e.g. 1.2.0] + - OSHDB Version: [e.g. 1.2.1] - Maven version: [e.g. 3.6.3] diff --git a/CITATION.cff b/CITATION.cff index 418a1a065..d9edf5543 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,4 +1,4 @@ -cff-version: 1.2.0 +cff-version: 1.2.1 message: "If you use this software, please cite it as below." authors: - family-names: "Raifer" @@ -12,7 +12,7 @@ authors: - family-names: "Schott" given-names: "Moritz" title: "OSHDB - OpenStreetMap History Data Analysis" -version: 1.2.0 +version: 1.2.1 doi: 10.5281/zenodo.4146990 date-released: 2021-07-22 url: "https://github.com/GIScience/oshdb" diff --git a/README.md b/README.md index 7136318ca..fcae18030 100644 --- a/README.md +++ b/README.md @@ -65,14 +65,14 @@ The API is based on the MapReduce programming model and offers powerful methods Installation ------------ -The OSHDB is available as a pre-compiled maven library and can be incorporated easily in any maven project. If you're starting a new project, take a look at how your IDE handles maven projects (for example, here you find instructions how to create a new maven project using [IntelliJ](https://www.jetbrains.com/help/idea/maven-support.html#maven_create_project)). Our [first steps tutorial](https://github.com/GIScience/oshdb/tree/1.2.0/documentation/first-steps#2-add-maven-dependency) includes further information about how to add the OSHDB as a maven dependency to your projects. +The OSHDB is available as a pre-compiled maven library and can be incorporated easily in any maven project. If you're starting a new project, take a look at how your IDE handles maven projects (for example, here you find instructions how to create a new maven project using [IntelliJ](https://www.jetbrains.com/help/idea/maven-support.html#maven_create_project)). Our [first steps tutorial](https://github.com/GIScience/oshdb/tree/1.2.1/documentation/first-steps#2-add-maven-dependency) includes further information about how to add the OSHDB as a maven dependency to your projects. Documentation ------------- * [first steps tutorial](documentation/first-steps/README.md) * [User Manual](documentation/manual/README.md) -* [OSHDB Javadoc](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/) +* [OSHDB Javadoc](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/) Examples -------- diff --git a/documentation/README.md b/documentation/README.md index 0e46e7280..dedff1b31 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -8,5 +8,5 @@ Here you find OSHDB related documentation material: Explains the design of the OSHDB data model and shows the different features of the OSHDB API and how they can be used to efficiently query the OSM history data. * [Examples](https://gitlab.gistools.geog.uni-heidelberg.de/giscience/big-data/ohsome/oshdb-examples)
Contains some example code for how to use the OSHDB to analyze the OSM history data. -* [OSHDB Javadoc](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/)
+* [OSHDB Javadoc](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/)
This lists all methods offered by the various OSHDB modules, packages and classes. diff --git a/documentation/first-steps/README.md b/documentation/first-steps/README.md index 47605eab0..e6e1be16e 100644 --- a/documentation/first-steps/README.md +++ b/documentation/first-steps/README.md @@ -25,7 +25,7 @@ If you already have an existing Java maven project, the OSHDB-API can be added t org.heigit.ohsome oshdb-api - 1.2.0 + 1.2.1 ``` @@ -80,7 +80,7 @@ In our example, we only want to look at OSM way objects which have the `building .filter("type:way and building=*") ``` -There are a variety of available filter selectors which can be combined into a [filter](https://github.com/GIScience/oshdb/tree/1.2.0/documentation/first-steps) string: each one specifies a property which OSM objects can have. These selectors can be combined into a filter string using boolean operators and parentheses. If multiple `filter`s are set, the result will contain only the OSM objects which match all given filters. +There are a variety of available filter selectors which can be combined into a [filter](https://github.com/GIScience/oshdb/tree/1.2.1/documentation/first-steps) string: each one specifies a property which OSM objects can have. These selectors can be combined into a filter string using boolean operators and parentheses. If multiple `filter`s are set, the result will contain only the OSM objects which match all given filters. ## 7. Calculating intermediate results diff --git a/documentation/first-steps/example-pom.xml b/documentation/first-steps/example-pom.xml index 94db67553..c3618e3f1 100644 --- a/documentation/first-steps/example-pom.xml +++ b/documentation/first-steps/example-pom.xml @@ -8,7 +8,7 @@ org.heigit.ohsome oshdb-api - 1.2.0 + 1.2.1 diff --git a/documentation/manual/aggregation.md b/documentation/manual/aggregation.md index 5a21f8a35..90b4d2b5d 100644 --- a/documentation/manual/aggregation.md +++ b/documentation/manual/aggregation.md @@ -5,12 +5,12 @@ Often, when querying OSM history data one is interested in getting multiple resu The OSHDB API provides a flexible and powerful way to produce aggregated results that are calculated for arbitrary subsets of the data. This `aggregateBy` functionality also supports the combination of multiple such grouping functions chained after each other. -When executing any of the below listed aggregateBy methods, the query's MapReducer is transformed into a [`MapAggregator`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapAggregator.html) object which is (mostly) functionally equivalent to a MapReducer, with the difference that instead of returning single result values when calling any [reduce](map-reduce.md#reduce) method, an associative list of multiple values is returned instead: The result contains one entry for each requested grouping. +When executing any of the below listed aggregateBy methods, the query's MapReducer is transformed into a [`MapAggregator`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapAggregator.html) object which is (mostly) functionally equivalent to a MapReducer, with the difference that instead of returning single result values when calling any [reduce](map-reduce.md#reduce) method, an associative list of multiple values is returned instead: The result contains one entry for each requested grouping. aggregateBy ----------- -This is the most generic grouping method, that allows to produce aggregated results that refer to arbitrary subsets of the input data. The [`aggregateBy`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateBy(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) method accepts a function that must return an “index” value by which the respective result should be grouped by. For example, when one wants to group results by OSM type, the aggregateBy method should simply return the OSM type value, as in the following example using the OSHDB snapshot view: +This is the most generic grouping method, that allows to produce aggregated results that refer to arbitrary subsets of the input data. The [`aggregateBy`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateBy(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) method accepts a function that must return an “index” value by which the respective result should be grouped by. For example, when one wants to group results by OSM type, the aggregateBy method should simply return the OSM type value, as in the following example using the OSHDB snapshot view: ```java Map countBuildingsByType = OSMEntitySnapshotView.on(…) @@ -21,7 +21,7 @@ Map countBuildingsByType = OSMEntitySnapshotView.on(…) .count(); ``` -Optionally, the [`aggregateBy`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateBy(org.heigit.ohsome.oshdb.util.function.SerializableFunction,java.util.Collection)) method allows to specify a collection of groups which are expected to be present in the result. If for a particular group, no matching OSM entities are found in the query, the result will then still contain this key, filled with a “zero” value (e.g. `[]` for a set). +Optionally, the [`aggregateBy`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateBy(org.heigit.ohsome.oshdb.util.function.SerializableFunction,java.util.Collection)) method allows to specify a collection of groups which are expected to be present in the result. If for a particular group, no matching OSM entities are found in the query, the result will then still contain this key, filled with a “zero” value (e.g. `[]` for a set). > For example, if the count reducer is used in a query, the result contains `0` integer values in entries for which no results were found. If instead the collect reduce method is used, empty lists are used to fill no-data entries. @@ -40,12 +40,12 @@ This is a specialized method for grouping results by timestamps. Depending on th > For example, when in a query the following three timestamps are set: `2014-01-01`, `2015-01-01` and `2016-01-01`, then a contribution happening at `2015-03-14` will be associated to the time interval between `2015-01-01` and `2016-01-01` (which is represented in the output as the starting time of the interval: `2015-01-01`). -There are two variants that allow this grouping by a timestamp: [`aggregateByTimestamp`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateByTimestamp()) tries to automatically fetch the timestamps from the queried data (i.e. the snapshot, or the contribution objects), while the second variant of [`aggregateByTimestamp`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateByTimestamp(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) takes a callback function that returns an arbitrary timestamp value. The second variant has to be used in some cases where the automatic matching of objects to its timestamps isn't possible, for example when using the [groupByEntity](views.md#groupbyentity) option in a query, or when using multiple [aggregateBy](#combining-multiple-aggregateby)s in a query. +There are two variants that allow this grouping by a timestamp: [`aggregateByTimestamp`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateByTimestamp()) tries to automatically fetch the timestamps from the queried data (i.e. the snapshot, or the contribution objects), while the second variant of [`aggregateByTimestamp`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateByTimestamp(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) takes a callback function that returns an arbitrary timestamp value. The second variant has to be used in some cases where the automatic matching of objects to its timestamps isn't possible, for example when using the [groupByEntity](views.md#groupbyentity) option in a query, or when using multiple [aggregateBy](#combining-multiple-aggregateby)s in a query. aggregateByGeometry ------------------- -Calculating results for multiple sub-regions of an area of interest at once is possible through [`aggregateByGeometry`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateByGeometry(java.util.Map)). It accepts an associative list of polygonal geometries with corresponding index values. The result will then use these index values to represent the individual sub-region results. +Calculating results for multiple sub-regions of an area of interest at once is possible through [`aggregateByGeometry`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#aggregateByGeometry(java.util.Map)). It accepts an associative list of polygonal geometries with corresponding index values. The result will then use these index values to represent the individual sub-region results. When using the aggregateByGeometry functionality, any OSM entity geometry that is contained in multiple sub-regions will be split and clipped to the respective geometries. @@ -54,7 +54,7 @@ The given grouping geometries are allowed to overlap each other, but they should combining multiple aggregateBy ------------------------------ -When writing an OSHDB query, it is possible to perform multiple of the above mentioned aggregateBy operations. For example, it is possible to write a query that returns results that are aggregated by timestamps and by OSM type. In this case, the final result will contain one entry for each possible combination of the specified groupings. These combined indices are encoded as [`OSHDBCombinedIndex`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/generic/OSHDBCombinedIndex.html) objects in the final result map. +When writing an OSHDB query, it is possible to perform multiple of the above mentioned aggregateBy operations. For example, it is possible to write a query that returns results that are aggregated by timestamps and by OSM type. In this case, the final result will contain one entry for each possible combination of the specified groupings. These combined indices are encoded as [`OSHDBCombinedIndex`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/generic/OSHDBCombinedIndex.html) objects in the final result map. ```java Map, Integer> countBuildingsByTimeAndType = OSMEntitySnapshotView.on(…) diff --git a/documentation/manual/database-backends.md b/documentation/manual/database-backends.md index e354759be..a70f0d685 100644 --- a/documentation/manual/database-backends.md +++ b/documentation/manual/database-backends.md @@ -8,20 +8,20 @@ Database backends can implement different algorithms that control how a query is OSHDBJdbc / OSHDBH2 ------------------- -The [`ODHSBJDBC`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/db/OSHDBJdbc.html) backend is often used in the `OSHDBH2` variant, which expects data to be stored in a single H2 database file. A few example OSHDB extracts in the H2 format are available as download from [downloads.ohsome.org](https://downloads.ohsome.org/OSHDB/v1.0/). +The [`ODHSBJDBC`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/db/OSHDBJdbc.html) backend is often used in the `OSHDBH2` variant, which expects data to be stored in a single H2 database file. A few example OSHDB extracts in the H2 format are available as download from [downloads.ohsome.org](https://downloads.ohsome.org/OSHDB/v1.0/). Alternatively, the OSHDB data can also be stored in any JDBC compatible database (e.g. a [PostgreSQL](https://www.postgresql.org/) database). The OSHDB data is however always processed and analyzed locally on the machine from which the OSHDB query is started. It is therefore advisable to keep the OSHDB data as local as possible in order to minimize network traffic when using the OSHDBJdbc backend. OSHDBIgnite ----------- -The [`OSHDBIgnite`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/db/OSHDBIgnite.html) backend executes computations on a distributed cluster of computers running the [Apache Ignite](https://ignite.apache.org/) big data platform. Each of the computers of the cluster only holds a subset of the global OSHDB data set and can therefore execute its part of an OSHDB query more quickly than a single computer having to process the whole data set. +The [`OSHDBIgnite`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/db/OSHDBIgnite.html) backend executes computations on a distributed cluster of computers running the [Apache Ignite](https://ignite.apache.org/) big data platform. Each of the computers of the cluster only holds a subset of the global OSHDB data set and can therefore execute its part of an OSHDB query more quickly than a single computer having to process the whole data set. -There are currently three different [compute modes](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/db/OSHDBIgnite.html#computeMode()) available in the OSHDBIgnite backend: +There are currently three different [compute modes](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/db/OSHDBIgnite.html#computeMode()) available in the OSHDBIgnite backend: * *LOCAL_PEEK* - (default) is optimized for small to mid scale queries. * *SCAN_QUERY* - works better for large scale (e.g. global) analysis queries. -* *AFFINITY_CALL* - is generally slower than the other two compute modes, but supports [streaming](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#stream()) of results. +* *AFFINITY_CALL* - is generally slower than the other two compute modes, but supports [streaming](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#stream()) of results. In order to use the OSHDB Ignite backend, it is necessary to add the maven module `oshdb-api-ignite` to your project's maven dependencies: @@ -29,6 +29,6 @@ In order to use the OSHDB Ignite backend, it is necessary to add the maven modul org.heigit.ohsome oshdb-api-ignite - 1.2.0 + 1.2.1 ``` diff --git a/documentation/manual/filters.md b/documentation/manual/filters.md index 191fa75f9..793479ac1 100644 --- a/documentation/manual/filters.md +++ b/documentation/manual/filters.md @@ -3,19 +3,19 @@ Filtering OSM Data Often one doesn't want to investigate the whole OSM data set at once, but only a specific part of it. For example, all the OSM data in a given region, or all OSM objects that have a given [tag](https://wiki.openstreetmap.org/wiki/Tags), [type](https://wiki.openstreetmap.org/wiki/Elements), or other property of the respective OSM entity. -For this, the [`MapReducer`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html) provides a variety of filtering methods which allow one to select any subset of the OSM data. Multiple filters can be applied after each other. The result will then contain any OSM elements that match **all** of the specified filters. +For this, the [`MapReducer`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html) provides a variety of filtering methods which allow one to select any subset of the OSM data. Multiple filters can be applied after each other. The result will then contain any OSM elements that match **all** of the specified filters. areaOfInterest -------------- -This defines the region where the query should be restricted on. It can be either a [bounding box](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#areaOfInterest(org.heigit.ohsome.oshdb.OSHDBBoundingBox)) ([`OSHDBBoundingBox`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/OSHDBBoundingBox.html)) or any [(polygonal) JTS geometry](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#areaOfInterest(P)) such as a Polygon or MultiPolygon. +This defines the region where the query should be restricted on. It can be either a [bounding box](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#areaOfInterest(org.heigit.ohsome.oshdb.OSHDBBoundingBox)) ([`OSHDBBoundingBox`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/OSHDBBoundingBox.html)) or any [(polygonal) JTS geometry](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#areaOfInterest(P)) such as a Polygon or MultiPolygon. The output of this filter will keep only OSM entities whose geometry lie within or which intersect the given areaOfInterest. This included also OSM entities for which that none of their child elements lie within the given area of interest. > For example, a large forest polygon in OSM that completely encompasses a small area of interest _is_ returned by the OSHDB API. -The resulting geometries produced by the different OSHDB [views](views.md) are by default clipped to the specified area of interest. This makes it possible to directly calculate the length or area of linear or polygonal OSM features within the given query region, without having to consider the fact that some features might only partially lie within the region. It is, at the same time, still possible to access full extent of the respective OSM features' [unclipped](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getGeometryUnclipped()) [geometries](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getGeometryUnclippedBefore()). You can find further information in the section about how the OSHDB [builds geometries](geometries.md) from OSM data. +The resulting geometries produced by the different OSHDB [views](views.md) are by default clipped to the specified area of interest. This makes it possible to directly calculate the length or area of linear or polygonal OSM features within the given query region, without having to consider the fact that some features might only partially lie within the region. It is, at the same time, still possible to access full extent of the respective OSM features' [unclipped](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getGeometryUnclipped()) [geometries](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getGeometryUnclippedBefore()). You can find further information in the section about how the OSHDB [builds geometries](geometries.md) from OSM data. The OSHDB is able to cope well even with complex polygons that have many vertices as areas of interest, but keep in mind that using simpler geometries will generally result in higher query performance: For example a bounding-box query is executed slightly faster than a polygon-areaOfInterest query with a rectangular polygon. @@ -24,22 +24,22 @@ The OSHDB is able to cope well even with complex polygons that have many vertice timestamps ---------- -This specifies the time range and time subdivisions for the OSHDB query. Accepts [one](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String)) [or](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String,java.lang.String)) [more](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String,java.lang.String,java.lang.String...)) [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formatted dates (given in the [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) timezone). Depending on the used OSHDB [view](views.md), these timestamps are interpreted slightly differently: When using the **snapshot** view, the given timestamps define the dates at which the snapshots of the OSM entities are taken. When using the **contribution** view, all modifications to the OSM entities are returned that lie within the time range defined by the given first and last timestamp, while any further timestamps can be used later to [aggregate](aggregation.md) results into finer time intervals. +This specifies the time range and time subdivisions for the OSHDB query. Accepts [one](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String)) [or](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String,java.lang.String)) [more](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String,java.lang.String,java.lang.String...)) [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formatted dates (given in the [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) timezone). Depending on the used OSHDB [view](views.md), these timestamps are interpreted slightly differently: When using the **snapshot** view, the given timestamps define the dates at which the snapshots of the OSM entities are taken. When using the **contribution** view, all modifications to the OSM entities are returned that lie within the time range defined by the given first and last timestamp, while any further timestamps can be used later to [aggregate](aggregation.md) results into finer time intervals. -There exists also a [method](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String,java.lang.String,org.heigit.ohsome.oshdb.util.time.OSHDBTimestamps.Interval)) to define common regularly spaced time intervals within a time range, e.g. a monthly time interval between two dates. +There exists also a [method](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#timestamps(java.lang.String,java.lang.String,org.heigit.ohsome.oshdb.util.time.OSHDBTimestamps.Interval)) to define common regularly spaced time intervals within a time range, e.g. a monthly time interval between two dates. _OSHDB_ filter --------------- -An easy way to provide [`filter`s](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#filter(java.lang.String)) is through the functionality of [OSHDB filters](https://github.com/GIScience/oshdb/blob/1.2.0/oshdb-filter/README.md), which allow one to define osm data filters in a human-readable syntax. With these one can combine several tag-, type- and geometry-filters with arbitrary boolean operators. +An easy way to provide [`filter`s](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#filter(java.lang.String)) is through the functionality of [OSHDB filters](https://github.com/GIScience/oshdb/blob/1.2.1/oshdb-filter/README.md), which allow one to define osm data filters in a human-readable syntax. With these one can combine several tag-, type- and geometry-filters with arbitrary boolean operators. -Simple examples of filters are `type:node and natural=tree` to select trees, or `geometry:polygon and building=*` to filter for buildings. More examples and can be found on the [dedicated filter documentation page](https://github.com/GIScience/oshdb/blob/1.2.0/oshdb-filter/README.md#examples). +Simple examples of filters are `type:node and natural=tree` to select trees, or `geometry:polygon and building=*` to filter for buildings. More examples and can be found on the [dedicated filter documentation page](https://github.com/GIScience/oshdb/blob/1.2.1/oshdb-filter/README.md#examples). -By using the methods [`Filter.byOSMEntity`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/filter/Filter.html#byOSMEntity(org.heigit.ohsome.oshdb.util.function.OSMEntityFilter)) and [`Filter.byOSHEntity`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/filter/Filter.html#byOSHEntity(org.heigit.ohsome.oshdb.util.function.OSHEntityFilter)) one can define arbitrary callback functions to filter OSM or OSH entities, respectively. +By using the methods [`Filter.byOSMEntity`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/filter/Filter.html#byOSMEntity(org.heigit.ohsome.oshdb.util.function.OSMEntityFilter)) and [`Filter.byOSHEntity`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/filter/Filter.html#byOSHEntity(org.heigit.ohsome.oshdb.util.function.OSHEntityFilter)) one can define arbitrary callback functions to filter OSM or OSH entities, respectively. _lambda_ filter --------------- -It is possible to define [`filter` functions](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#filter(org.heigit.ohsome.oshdb.util.function.SerializablePredicate)) that can sort out values after they already have been transformed in a [map](map-reduce.md#map) step. +It is possible to define [`filter` functions](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#filter(org.heigit.ohsome.oshdb.util.function.SerializablePredicate)) that can sort out values after they already have been transformed in a [map](map-reduce.md#map) step. Note that it is usually best to use the _OSHDB_ filters described above wherever possible, as they can reduce the amount of data to be iterated over right from the start of the query. Lambda filter functions are only executed after the OSM data has already been computed and transformed. diff --git a/documentation/manual/geometries.md b/documentation/manual/geometries.md index cab45ca9d..84471aa20 100644 --- a/documentation/manual/geometries.md +++ b/documentation/manual/geometries.md @@ -13,7 +13,7 @@ Nodes are always presented as [`Point`](https://locationtech.github.io/jts/javad Ways ---- -Ways are converted to either [`LineString`](https://locationtech.github.io/jts/javadoc/org/locationtech/jts/geom/LineString.html) or [`Polygon`](https://locationtech.github.io/jts/javadoc/org/locationtech/jts/geom/Polygon.html) geometries depending on their composition and their tags: A not closed way is always represented as a line, while it depends for a closed one. The [`TagInterpreter`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/taginterpreter/TagInterpreter.html) component of the OSHDB is responsible for deciding whether a closed way results in a line or a polygon: A (closed) OSM way with the tag `building=yes` will be converted to a polygon geometry, while a `junction=roundabout` one will not. +Ways are converted to either [`LineString`](https://locationtech.github.io/jts/javadoc/org/locationtech/jts/geom/LineString.html) or [`Polygon`](https://locationtech.github.io/jts/javadoc/org/locationtech/jts/geom/Polygon.html) geometries depending on their composition and their tags: A not closed way is always represented as a line, while it depends for a closed one. The [`TagInterpreter`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/taginterpreter/TagInterpreter.html) component of the OSHDB is responsible for deciding whether a closed way results in a line or a polygon: A (closed) OSM way with the tag `building=yes` will be converted to a polygon geometry, while a `junction=roundabout` one will not. Relations --------- diff --git a/documentation/manual/helpers/OSHDBApplication.md b/documentation/manual/helpers/OSHDBApplication.md index e7a376fee..64f3df8a3 100644 --- a/documentation/manual/helpers/OSHDBApplication.md +++ b/documentation/manual/helpers/OSHDBApplication.md @@ -7,7 +7,7 @@ Replace your OSHDB dependency with the following: org.heigit.ohsome oshdb-application-template - 1.2.0 + 1.2.1 ``` diff --git a/documentation/manual/helpers/OSHDBDriver.md b/documentation/manual/helpers/OSHDBDriver.md index d2be9f867..e7e77ec5e 100644 --- a/documentation/manual/helpers/OSHDBDriver.md +++ b/documentation/manual/helpers/OSHDBDriver.md @@ -7,7 +7,7 @@ Replace your OSHDB dependency with the following: org.heigit.ohsome oshdb-database-driver - 1.2.0 + 1.2.1 ``` diff --git a/documentation/manual/installation.md b/documentation/manual/installation.md index 69cb0bd4e..b180fc7ba 100644 --- a/documentation/manual/installation.md +++ b/documentation/manual/installation.md @@ -21,7 +21,7 @@ Simply add the OSHDB as a dependency to your `pom.xml` file. For most use cases org.heigit.ohsome oshdb-api - 1.2.0 + 1.2.1 ``` diff --git a/documentation/manual/map-reduce.md b/documentation/manual/map-reduce.md index 53f03e915..73003d3b1 100644 --- a/documentation/manual/map-reduce.md +++ b/documentation/manual/map-reduce.md @@ -1,7 +1,7 @@ Map and Reduce ============== -The [`MapReducer`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html) is the central object of every OSHDB query. It is returned by the initial OSHDB [view](views.md) and allows to [filter](filters.md) out defined subsets of the OSM history dataset. At that point one can transform (**map**) and aggregate (**reduce**) the respective OSM data into a final result. +The [`MapReducer`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html) is the central object of every OSHDB query. It is returned by the initial OSHDB [view](views.md) and allows to [filter](filters.md) out defined subsets of the OSM history dataset. At that point one can transform (**map**) and aggregate (**reduce**) the respective OSM data into a final result. > For example, a map function can calculate the length of every OSM highway, and a reduce function can sum up all of these length values. @@ -10,14 +10,14 @@ For many of the most frequently used reduce operations, such as the summing up o map --- -A transformation function can be set by calling the [`map`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#map(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) method of any MapReducer. It is allowed to have an OSHDB query without a map step or one with multiple map steps, which are executed one after each other. Such a map function can also transform the data type of the MapReducer it operates on. +A transformation function can be set by calling the [`map`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#map(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) method of any MapReducer. It is allowed to have an OSHDB query without a map step or one with multiple map steps, which are executed one after each other. Such a map function can also transform the data type of the MapReducer it operates on. > For example, when calculating the length (which is a floating-point number) of an entity snapshot, the underlying MapReducer changes from type `MapReducer` to being a `MapReducer`. flatMap ------- -A [`flatMap`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#flatMap(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) operation allows one to map any input value to an arbitrary amount of output values. Each of the output values can be transformed in further map steps individually. +A [`flatMap`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#flatMap(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) operation allows one to map any input value to an arbitrary amount of output values. Each of the output values can be transformed in further map steps individually. filter ------ @@ -27,7 +27,7 @@ Filters can even be applied in the map phase. Read more about this feature in th reduce ------ -The [`reduce`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#reduce(org.heigit.ohsome.oshdb.util.function.SerializableSupplier,org.heigit.ohsome.oshdb.util.function.SerializableBiFunction,org.heigit.ohsome.oshdb.util.function.SerializableBinaryOperator)) operation produces the final result of an OSHDB query. It takes the result of the previous map steps and combines (reduces) these values into a final result. This can be something as simple as summing up all the values, but also something more complicated, for example estimating statistical properties such as the median of the calculated values. Many queries use common reduce operations, for which the OSHDB provides shorthand methods (see [below](#specialized-reducers)). +The [`reduce`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#reduce(org.heigit.ohsome.oshdb.util.function.SerializableSupplier,org.heigit.ohsome.oshdb.util.function.SerializableBiFunction,org.heigit.ohsome.oshdb.util.function.SerializableBinaryOperator)) operation produces the final result of an OSHDB query. It takes the result of the previous map steps and combines (reduces) these values into a final result. This can be something as simple as summing up all the values, but also something more complicated, for example estimating statistical properties such as the median of the calculated values. Many queries use common reduce operations, for which the OSHDB provides shorthand methods (see [below](#specialized-reducers)). Every OSHDB query must have exactly one terminal reduce operation (or use the `stream` method explained [below](#stream)). @@ -38,29 +38,29 @@ specialized reducers The OSHDB provides the following list of default reduce operations, that are often used for querying OSM history data. Their names and usage are mostly self-explanatory. -* [`count`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#count()) -* [`sum`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#sum()) -* [`average`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#average()) -* [`weightedAverage`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#weightedAverage(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) -* [`uniq`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#uniq()) -* [`countUniq`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#countUniq()) -* [`estimatedMedian`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#estimatedMedian()) -* [`estimatedQuantile(s)`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#estimatedQuantiles()) -* [`collect`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#collect()) +* [`count`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#count()) +* [`sum`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#sum()) +* [`average`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#average()) +* [`weightedAverage`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#weightedAverage(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) +* [`uniq`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#uniq()) +* [`countUniq`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#countUniq()) +* [`estimatedMedian`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#estimatedMedian()) +* [`estimatedQuantile(s)`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#estimatedQuantiles()) +* [`collect`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#collect()) -Some listed specialized reducers also have overloaded versions that accept a mapping function directly. This allows some queries to be written more concisely, but also allows for improved type inference: For example when summing integer values, using the overloaded [`sum`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#sum(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) reducer knows that the result must also be of type `Integer`, and doesn't have to resort on returning the more generic `Number` type. +Some listed specialized reducers also have overloaded versions that accept a mapping function directly. This allows some queries to be written more concisely, but also allows for improved type inference: For example when summing integer values, using the overloaded [`sum`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#sum(org.heigit.ohsome.oshdb.util.function.SerializableFunction)) reducer knows that the result must also be of type `Integer`, and doesn't have to resort on returning the more generic `Number` type. stream ------ -Instead of using a regular reduce operation at the end of an OSHDB query, one can also call [`stream`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#stream()), which doesn't aggregate the values into a final result, but rather returns a (potentially long) stream of values. If possible, using a reduce operation instead of streaming all values and using post-processing results in better performance of a query, because there is less data to be transferred. The stream operation is however beneficial over using `collect` if the result set is expected to be large, because it doesn't require all the data to be buffered into a result collection. +Instead of using a regular reduce operation at the end of an OSHDB query, one can also call [`stream`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#stream()), which doesn't aggregate the values into a final result, but rather returns a (potentially long) stream of values. If possible, using a reduce operation instead of streaming all values and using post-processing results in better performance of a query, because there is less data to be transferred. The stream operation is however beneficial over using `collect` if the result set is expected to be large, because it doesn't require all the data to be buffered into a result collection. geometry helpers ---------------- -Often, one might be interested in analyzing properties of the geometries of the analyzed OSM features. For some often used metrics, the OSHDB comes with a few built-in helper functions in its [`Geo`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/geometry/Geo.html) class: +Often, one might be interested in analyzing properties of the geometries of the analyzed OSM features. For some often used metrics, the OSHDB comes with a few built-in helper functions in its [`Geo`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/geometry/Geo.html) class: -* [`areaOf`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/geometry/Geo.html#areaOf(org.locationtech.jts.geom.Geometry)) returns the area (in `m²`) of polygonal geometries. -* [`lengthOf`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/geometry/Geo.html#lengthOf(org.locationtech.jts.geom.Geometry)) returns the length (in `m`) of linear geometries. +* [`areaOf`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/geometry/Geo.html#areaOf(org.locationtech.jts.geom.Geometry)) returns the area (in `m²`) of polygonal geometries. +* [`lengthOf`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/geometry/Geo.html#lengthOf(org.locationtech.jts.geom.Geometry)) returns the length (in `m`) of linear geometries. > Note that both of these methods use approximation formulas to calculate the length or area of OSM geometries. For typical features present in OpenStreetMap data, however, the relative error introduced by these approximations are quite small (below 0.1% for lengths and < 0.001% for areas). diff --git a/documentation/manual/views.md b/documentation/manual/views.md index ed18a6048..fbd0cfeee 100644 --- a/documentation/manual/views.md +++ b/documentation/manual/views.md @@ -3,8 +3,8 @@ Views Two different ways of querying OSM data are available, which determine how the OSM history data is actually analyzed in a given OSHDB query: -* The **snapshot view** ([`OSMEntitySnapshotView`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMEntitySnapshotView.html)) returns the state of the OSM history data at specific given points in time. -* The **contribution view** ([`OSMContributionView`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMContributionView.html)) returns all modifications (e.g., creations, modifications or deletions) to the OSM elements within a given time period. +* The **snapshot view** ([`OSMEntitySnapshotView`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMEntitySnapshotView.html)) returns the state of the OSM history data at specific given points in time. +* The **contribution view** ([`OSMContributionView`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMContributionView.html)) returns all modifications (e.g., creations, modifications or deletions) to the OSM elements within a given time period. The snapshot view is particularly useful for analysing how the amount of OSM data changed over time. The contribution view can be used to determine the number of OSM contributors editing the OSM data. @@ -13,7 +13,7 @@ The snapshot view is particularly useful for analysing how the amount of OSM dat Using OSHDB Views ----------------- -Both views can be used in the OSHDB API in very similar ways and only differ in the type of data that is returned by the [`MapReducer`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html) object that is returned when calling the [`on`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMContributionView.html#on(org.heigit.ohsome.oshdb.api.db.OSHDBDatabase)) method of the respective view: The `OSMEntitySnapshotView` returns a MapReducer of [`OSMEntitySnapshot`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html) objects, while the `OSMContributionView` returns a MapReducer of [`OSMContribution`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html) objects. +Both views can be used in the OSHDB API in very similar ways and only differ in the type of data that is returned by the [`MapReducer`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html) object that is returned when calling the [`on`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMContributionView.html#on(org.heigit.ohsome.oshdb.api.db.OSHDBDatabase)) method of the respective view: The `OSMEntitySnapshotView` returns a MapReducer of [`OSMEntitySnapshot`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html) objects, while the `OSMContributionView` returns a MapReducer of [`OSMContribution`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html) objects. ```java OSHDBDatabase oshdb = …; @@ -26,28 +26,28 @@ A MapReducer is conceptually very similar to a [Stream](https://docs.oracle.com/ ### Snapshot View -The [`OSMEntitySnapshot`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html) is quite simple: it returns the state of the OSM data at a given point in time, or at multiple given points in time. In the OSHDB API, these are called snapshots and are represented by [`OSMEntitySnapshot`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html) objects. They allow access to the following properties: +The [`OSMEntitySnapshot`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html) is quite simple: it returns the state of the OSM data at a given point in time, or at multiple given points in time. In the OSHDB API, these are called snapshots and are represented by [`OSMEntitySnapshot`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html) objects. They allow access to the following properties: -* the [timestamp](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getTimestamp()) of the snapshot -* the [geometry](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getGeometry()) of the queried OSM feature -* the [OSM entity](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getEntity()) of this snapshot +* the [timestamp](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getTimestamp()) of the snapshot +* the [geometry](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getGeometry()) of the queried OSM feature +* the [OSM entity](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMEntitySnapshot.html#getEntity()) of this snapshot ### Contribution View -The [`OSMContributionView`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMContributionView.html) returns all modifications to matching OSM entities. This is in general more computationally intensive than using the snapshot view, but allows to inspect the OSM data in more detail, especially if one is interested in how the OSM data is modified by the contributors to the OSM project. +The [`OSMContributionView`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/OSMContributionView.html) returns all modifications to matching OSM entities. This is in general more computationally intensive than using the snapshot view, but allows to inspect the OSM data in more detail, especially if one is interested in how the OSM data is modified by the contributors to the OSM project. Specifically, the OSHDB API considers all modifications to [semantic OSM elements](https://wiki.openstreetmap.org/wiki/Semantic_elements) as a contribution: This includes both direct edits (e.g. tag changes) as well as changes which are based in changes of referenced OSM objects (e.g. certain geometry changes). When OSM entities are changed multiple times in a single [OSM changeset](https://wiki.openstreetmap.org/wiki/Changeset), these are squashed into one single contribution result. -Through the returned [`OSMContribution`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html) objects, one has access to the following properties: +Through the returned [`OSMContribution`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html) objects, one has access to the following properties: -* the [timestamp](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getTimestamp()) of the contribution -* the geometries [before](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getGeometryBefore()) and [after](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getGeometryAfter()) the modification. If the contribution object represents a creation of an entity, the before geometry doesn't exist and returns `null` if it is accessed. Similarly, this is also true for the geometry after a deletion of an OSM object. -* the OSM entity [before](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getEntityBefore()) and [after](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getEntityBefore()) the modification -* the [id of the OSM user](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getContributorUserId()) who performed this contribution -* the [id of the OSM changeset](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getChangesetId()) in which this contribution was performed -* the [type](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getContributionTypes()) of the contribution. +* the [timestamp](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getTimestamp()) of the contribution +* the geometries [before](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getGeometryBefore()) and [after](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getGeometryAfter()) the modification. If the contribution object represents a creation of an entity, the before geometry doesn't exist and returns `null` if it is accessed. Similarly, this is also true for the geometry after a deletion of an OSM object. +* the OSM entity [before](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getEntityBefore()) and [after](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getEntityBefore()) the modification +* the [id of the OSM user](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getContributorUserId()) who performed this contribution +* the [id of the OSM changeset](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getChangesetId()) in which this contribution was performed +* the [type](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/mappable/OSMContribution.html#getContributionTypes()) of the contribution. -The [contribution type](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/util/celliterator/ContributionType.html) can be either a **creation**, a **deletion**, a **tag change**, or a **geometry change** of an OSM entity. +The [contribution type](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/util/celliterator/ContributionType.html) can be either a **creation**, a **deletion**, a **tag change**, or a **geometry change** of an OSM entity. All of these contribution types refer to the filtered set of OSM data of the current MapReducer. This means that an OSM feature that has gained a specific tag in one of versions greater than one, will be reported as a “creation” by the contribution view of the OSHDB API if the query was programmed to filter for that particular tag. Analogously this is also the case if an object was moved from outside an area of interest into the query region, and also for the inverse cases which are returned as deletions. This makes sure that summing up all creations and subtracting all deletions matches the results one can obtain from a query using the snapshot view. @@ -56,7 +56,7 @@ Note that there exist [cases](https://github.com/GIScience/oshdb/issues/87) wher GroupByEntity ------------- -The [`groupByEntity()`](https://docs.ohsome.org/java/oshdb/1.2.0/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#groupByEntity()) method of a MapReducer slightly changes the way the MapReducers receives and transforms values: Instead of iterating over each snapshot or contribution individually, in this mode all snapshots or all contributions of an individual OSM entity are collected into a list (sorted by timestamps) first. This makes it possible to investigate the full edit history of individual OSM objects at once, which is for example needed when one is looking for contributions that got reverted at a later point in time. +The [`groupByEntity()`](https://docs.ohsome.org/java/oshdb/1.2.1/aggregated/org/heigit/ohsome/oshdb/api/mapreducer/MapReducer.html#groupByEntity()) method of a MapReducer slightly changes the way the MapReducers receives and transforms values: Instead of iterating over each snapshot or contribution individually, in this mode all snapshots or all contributions of an individual OSM entity are collected into a list (sorted by timestamps) first. This makes it possible to investigate the full edit history of individual OSM objects at once, which is for example needed when one is looking for contributions that got reverted at a later point in time. It is recommended to call this method immediately after creating the MapReducer from a view: From a73b4576e6d6abe690222a4d1984c5a370856865 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 29 Sep 2023 13:09:44 +0200 Subject: [PATCH 6/7] lint --- .../oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java index 89b9694d3..a3f2d4619 100644 --- a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java +++ b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteLocalPeek.java @@ -33,8 +33,8 @@ import org.heigit.ohsome.oshdb.util.TableNames; import org.heigit.ohsome.oshdb.util.celliterator.CellIterator; import org.heigit.ohsome.oshdb.util.celliterator.OSHEntitySource; -import org.heigit.ohsome.oshdb.util.exceptions.OSHDBTimeoutException; import org.heigit.ohsome.oshdb.util.exceptions.OSHDBNotImplementedException; +import org.heigit.ohsome.oshdb.util.exceptions.OSHDBTimeoutException; import org.heigit.ohsome.oshdb.util.function.OSHEntityFilter; import org.heigit.ohsome.oshdb.util.function.OSMEntityFilter; import org.heigit.ohsome.oshdb.util.function.SerializableBiFunction; From 1f9a7591892535f252c05933179e728c88b290c0 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 29 Sep 2023 13:55:06 +0200 Subject: [PATCH 7/7] tweak code style --- .../api/mapreducer/backend/MapReducerIgniteAffinityCall.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java index 1579f9d48..ffac7d186 100644 --- a/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java +++ b/oshdb-api-ignite/src/main/java/org/heigit/ohsome/oshdb/api/mapreducer/backend/MapReducerIgniteAffinityCall.java @@ -260,7 +260,7 @@ cacheName, cellIdRangeToCellIds(), cellIdRanges, cellProcessor, cellIterator cacheName, cellIdRangeToCellIds(), cellIdRanges, cellProcessor, cellIterator ); } - ArrayList cellsWithData = asyncGetHandleTimeouts( + var cellsWithData = asyncGetHandleTimeouts( compute.broadcastAsync(preflight), this.timeout ).stream()