Skip to content

Commit

Permalink
Support GEOMETRYCOLLECTIONs (#17)
Browse files Browse the repository at this point in the history
* Support for geometrycollections

* add -a option for auto threshold

* better number formatting of numobjects

* homogenize opacity of layers

* add an INDEX_HASH_PREFIX before the index hash to avoid loading old caches after cache-breaking code changes

* change type for internal and qlever ID to size_t

* also request IDs in an incremental fashion from QLever

* fix percentage in log output for ID query

* dont show cache loading message on result loading

* dont expect leading " in WKTs, but support it optionally

* exactly half the id space for points, the other half for the rest

* increase batch size to 10M
  • Loading branch information
patrickbr authored Oct 14, 2024
1 parent 9713f2b commit 7ea149b
Show file tree
Hide file tree
Showing 13 changed files with 516 additions and 247 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required (VERSION 3.5)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 14)

project (qlever-petrimaps)

Expand Down
493 changes: 345 additions & 148 deletions src/qlever-petrimaps/GeomCache.cpp

Large diffs are not rendered by default.

22 changes: 19 additions & 3 deletions src/qlever-petrimaps/GeomCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <string>
#include <unordered_map>
#include <vector>
#include <chrono>

#include "qlever-petrimaps/Misc.h"
#include "util/geo/Geo.h"
Expand Down Expand Up @@ -55,6 +56,7 @@ class GeomCache {
void requestPart(size_t offset);

void requestIds();
void requestIdPart(size_t offset);

void parse(const char*, size_t size);
void parseIds(const char*, size_t size);
Expand Down Expand Up @@ -103,9 +105,10 @@ class GeomCache {
QLEVER_ID_TYPE _maxQid;
size_t _totalSize = 0;
std::atomic<size_t> _curRow;
std::atomic<size_t> _curIdRow;
size_t _curUniqueGeom;

enum _LoadStatusStages { Parse = 1, ParseIds, FromFile };
enum _LoadStatusStages { Parse = 1, ParseIds, FromFile, Finished };
_LoadStatusStages _loadStatusStage = Parse;

static size_t writeCb(void* contents, size_t size, size_t nmemb, void* userp);
Expand All @@ -124,15 +127,23 @@ class GeomCache {

std::string queryUrl(std::string query, size_t offset, size_t limit) const;

util::geo::FPoint parsePoint(const std::string& a, size_t p) const;
util::geo::FPoint createPoint(const std::string& a, size_t p) const;

static bool pointValid(const util::geo::FPoint& p);
static bool pointValid(const util::geo::DPoint& p);

static util::geo::DLine parseLineString(const std::string& a, size_t p);
static util::geo::DLine createLineString(const std::string& a, size_t p);

size_t parsePolygon(const std::string& str, size_t p, size_t end, size_t* i);

size_t parseMultiPoint(const std::string &str, size_t p, size_t end, size_t* i);
size_t parseMultiLineString(const std::string &str, size_t p, size_t end, size_t* i);
size_t parseMultiPolygon(const std::string &str, size_t p, size_t end, size_t* i);

void insertLine(const util::geo::DLine& l, bool isArea);

static std::vector<size_t> getGeomStarts(const std::string &str, size_t a);

std::string indexHashFromDisk(const std::string& fname);

std::vector<util::geo::FPoint> _points;
Expand All @@ -144,13 +155,18 @@ class GeomCache {
size_t _linesFSize;
size_t _qidToIdFSize;

size_t _lastBytesReceived;
std::chrono::time_point<std::chrono::high_resolution_clock> _lastReceivedTime;

std::fstream _pointsF;
std::fstream _linePointsF;
std::fstream _linesF;
std::fstream _qidToIdF;

size_t _geometryDuplicates = 0;

size_t _lastQid = -1;

IdMapping _lastQidToId;

std::vector<IdMapping> _qidToId;
Expand Down
7 changes: 3 additions & 4 deletions src/qlever-petrimaps/Misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
#define PETRIMAPS_MISC_H_

#define ID_TYPE uint32_t
// #define QLEVER_ID_TYPE size_t
#define QLEVER_ID_TYPE uint32_t
#define QLEVER_ID_TYPE size_t

const static ID_TYPE I_OFFSET = 500000000;
// half of the ID space for points, half for the rest
const static ID_TYPE I_OFFSET = 2147483648;
const static size_t MAXROWS = 18446744073709551615u;

// major coordinates will fit into 2^15, as coordinates go from
Expand All @@ -43,7 +43,6 @@ union ID {

inline bool operator<(const IdMapping& lh, const IdMapping& rh) {
if (lh.qid < rh.qid) return true;
// if (lh.qid == rh.qid && lh.id < rh.id) return true;
return false;
}

Expand Down
13 changes: 11 additions & 2 deletions src/qlever-petrimaps/PetriMapsMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ void printHelp(int argc, char** argv) {
"(default: 9090)"
<< "\n -m <memory> Max memory in GB (default: 90% of system RAM)"
<< "\n -c <dir> cache dir (default: none)"
<< "\n -t <minutes> request cache lifetime (default: 360)\n";
<< "\n -t <minutes> request cache lifetime (default: 360)"
<< "\n -a <numobjects> threshold for auto layer selection (default: "
"1000)\n";
}

// _____________________________________________________________________________
Expand All @@ -41,6 +43,7 @@ int main(int argc, char** argv) {
// default port
int port = 9090;
int cacheLifetime = 6 * 60;
size_t autoThreshold = 1000;
double maxMemoryGB =
(sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE) * 0.9) / 1000000000;
std::string cacheDir;
Expand Down Expand Up @@ -74,6 +77,12 @@ int main(int argc, char** argv) {
exit(1);
}
cacheLifetime = atof(argv[i]);
} else if (cur == "-a") {
if (++i >= argc) {
LOG(ERROR) << "Missing argument for auto threshold (-a).";
exit(1);
}
autoThreshold = atoi(argv[i]);
}
}

Expand All @@ -85,7 +94,7 @@ int main(int argc, char** argv) {

LOG(INFO) << "Starting server...";
LOG(INFO) << "Max memory is " << maxMemoryGB << " GB...";
Server serv(maxMemoryGB * 1000000000, cacheDir, cacheLifetime);
Server serv(maxMemoryGB * 1000000000, cacheDir, cacheLifetime, autoThreshold);

LOG(INFO) << "Listening on port " << port;
util::http::HttpServer(port, &serv, std::thread::hardware_concurrency())
Expand Down
81 changes: 48 additions & 33 deletions src/qlever-petrimaps/server/Requestor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,8 @@ const ResObj Requestor::getNearest(util::geo::DPoint rp, double rad, double res,
util::geo::FPoint p;
if (i >= _objects.size()) {
size_t cid = i - _objects.size();
p = clusterGeom(cid, res);
auto dp = clusterGeom(cid, res);
p = {dp.getX(), dp.getY()};
} else {
p = _cache->getPoints()[_objects[i].first];
}
Expand Down Expand Up @@ -573,12 +574,14 @@ const ResObj Requestor::getNearest(util::geo::DPoint rp, double rad, double res,
else
row = _objects[nearest].second;

auto points = geomPointGeoms(nearest, res);

return {true,
nearest >= _objects.size() ? nearest - _objects.size() : nearest,
geomPointGeoms(nearest, res),
points.size() == 1 ? points[0] : util::geo::centroid(points),
requestRow(row),
{},
{}};
points,
geomLineGeoms(nearest, rad / 10), geomPolyGeoms(nearest, rad / 10)};
}

if (dBestL < rad && dBestL <= dBest) {
Expand All @@ -590,29 +593,33 @@ const ResObj Requestor::getNearest(util::geo::DPoint rp, double rad, double res,

if (isArea && util::geo::contains(rp, util::geo::DPolygon(dline))) {
return {true, nearestL,
{frp}, requestRow(_objects[nearestL].second),
{}, geomPolyGeoms(nearestL, rad / 10)};
{frp.getX(), frp.getY()}, requestRow(_objects[nearestL].second),
geomPointGeoms(nearestL, res),
geomLineGeoms(nearestL, rad / 10), geomPolyGeoms(nearestL, rad / 10)};
} else {
if (isArea) {
auto p = util::geo::PolyLine<double>(dline).projectOn(rp).p;
auto fp = util::geo::FPoint(p.getX(), p.getY());
auto fp = util::geo::DPoint(p.getX(), p.getY());
return {true, nearestL,
{fp}, requestRow(_objects[nearestL].second),
{}, geomPolyGeoms(nearestL, rad / 10)};
fp, requestRow(_objects[nearestL].second),
geomPointGeoms(nearestL, res),
geomLineGeoms(nearestL, rad / 10), geomPolyGeoms(nearestL, rad / 10)};
} else {
auto p = util::geo::PolyLine<double>(dline).projectOn(rp).p;
auto fp = util::geo::FPoint(p.getX(), p.getY());
auto fp = util::geo::DPoint(p.getX(), p.getY());

return {true,
nearestL,
{fp},
fp,
requestRow(_objects[nearestL].second),
geomPointGeoms(nearestL, res),
geomLineGeoms(nearestL, rad / 10),
{}};
geomPolyGeoms(nearestL, rad / 10)};
}
}
}

return {false, 0, {{0, 0}}, {}, {}, {}};
return {false, 0, {0, 0}, {}, {}, {}, {}};
}

// _____________________________________________________________________________
Expand All @@ -628,12 +635,12 @@ const ResObj Requestor::getGeom(size_t id, double rad) const {
bool isArea = Requestor::isArea(lineId);

if (isArea) {
return {true, id, {{0, 0}}, {}, {}, geomPolyGeoms(id, rad / 10)};
return {true, id, {0, 0}, {}, geomPointGeoms(id, rad / 10), geomLineGeoms(id, rad / 10), geomPolyGeoms(id, rad / 10)};
} else {
return {true, id, {{0, 0}}, {}, geomLineGeoms(id, rad / 10), {}};
return {true, id, {0, 0}, {}, geomPointGeoms(id, rad / 10), geomLineGeoms(id, rad / 10), geomPolyGeoms(id, rad / 10)};
}
} else {
return {true, id, geomPointGeoms(id), {}, {}, {}};
return {true, id, {0, 0}, {}, geomPointGeoms(id, rad / 10), geomLineGeoms(id, rad / 10), geomPolyGeoms(id, rad / 10)};
}
}

Expand Down Expand Up @@ -686,30 +693,32 @@ util::geo::MultiLine<double> Requestor::geomLineGeoms(size_t oid,
// catch multigeometries
for (size_t i = oid;
i < _objects.size() && _objects[i].second == _objects[oid].second; i++) {
if (_objects[oid].first < I_OFFSET) continue;
if (_objects[i].first < I_OFFSET || Requestor::isArea(_objects[i].first - I_OFFSET)) continue;
const auto& fline = extractLineGeom(_objects[i].first - I_OFFSET);
polys.push_back(util::geo::simplify(fline, eps));
}

if (oid > 0) {
for (size_t i = oid - 1;
i < _objects.size() && _objects[i].second == _objects[oid].second; i--) {
if (_objects[oid].first < I_OFFSET) continue;
if (_objects[i].first < I_OFFSET || Requestor::isArea(_objects[i].first - I_OFFSET)) continue;
const auto& fline = extractLineGeom(_objects[i].first - I_OFFSET);
polys.push_back(util::geo::simplify(fline, eps));
}
}

return polys;
}

// _____________________________________________________________________________
util::geo::MultiPoint<float> Requestor::geomPointGeoms(size_t oid) const {
util::geo::MultiPoint<double> Requestor::geomPointGeoms(size_t oid) const {
return geomPointGeoms(oid, -1);
}

// _____________________________________________________________________________
util::geo::MultiPoint<float> Requestor::geomPointGeoms(size_t oid,
util::geo::MultiPoint<double> Requestor::geomPointGeoms(size_t oid,
double res) const {
std::vector<util::geo::FPoint> points;
std::vector<util::geo::DPoint> points;

if (!(res < 0) && oid >= _objects.size()) {
return {clusterGeom(oid - _objects.size(), res)};
Expand All @@ -722,14 +731,18 @@ util::geo::MultiPoint<float> Requestor::geomPointGeoms(size_t oid,
// catch multigeometries
for (size_t i = oid;
i < _objects.size() && _objects[i].second == _objects[oid].second; i++) {
if (_objects[oid].first >= I_OFFSET) continue;
points.push_back(_cache->getPoints()[_objects[i].first]);
if (_objects[i].first >= I_OFFSET) continue;
auto p = _cache->getPoints()[_objects[i].first];
points.push_back({p.getX(), p.getY()});
}

for (size_t i = oid - 1;
i < _objects.size() && _objects[i].second == _objects[oid].second; i--) {
if (_objects[oid].first >= I_OFFSET) continue;
points.push_back(_cache->getPoints()[_objects[i].first]);
if (oid > 0) {
for (size_t i = oid - 1;
i < _objects.size() && _objects[i].second == _objects[oid].second; i--) {
if (_objects[i].first >= I_OFFSET) continue;
auto p = _cache->getPoints()[_objects[i].first];
points.push_back({p.getX(), p.getY()});
}
}

return points;
Expand All @@ -743,27 +756,29 @@ util::geo::MultiPolygon<double> Requestor::geomPolyGeoms(size_t oid,
// catch multigeometries
for (size_t i = oid;
i < _objects.size() && _objects[i].second == _objects[oid].second; i++) {
if (_objects[oid].first < I_OFFSET) continue;
if (_objects[i].first < I_OFFSET || !Requestor::isArea(_objects[i].first - I_OFFSET)) continue;
const auto& dline = extractLineGeom(_objects[i].first - I_OFFSET);
polys.push_back(util::geo::DPolygon(util::geo::simplify(dline, eps)));
}

if (oid > 0) {
for (size_t i = oid - 1;
i < _objects.size() && _objects[i].second == _objects[oid].second; i--) {
if (_objects[oid].first < I_OFFSET) continue;
if (_objects[i].first < I_OFFSET || !Requestor::isArea(_objects[i].first - I_OFFSET)) continue;
const auto& dline = extractLineGeom(_objects[i].first - I_OFFSET);
polys.push_back(util::geo::DPolygon(util::geo::simplify(dline, eps)));
}
}

return polys;
}

// _____________________________________________________________________________
util::geo::FPoint Requestor::clusterGeom(size_t cid, double res) const {
util::geo::DPoint Requestor::clusterGeom(size_t cid, double res) const {
size_t oid = _clusterObjects[cid].first;
const auto& pp = _cache->getPoints()[_objects[oid].first];

if (res < 0) return {pp};
if (res < 0) return {pp.getX(), pp.getY()};

size_t num = _clusterObjects[cid].second.first;
size_t tot = _clusterObjects[cid].second.second;
Expand All @@ -788,13 +803,13 @@ util::geo::FPoint Requestor::clusterGeom(size_t cid, double res) const {
double y = pp.getY() + (rad + row * 13.0) * res *
cos(relpos * (2.0 * 3.14159265359 / tot));

return util::geo::FPoint{x, y};
return util::geo::DPoint{x, y};
} else {
float rad = 2 * tot;

float x = pp.getX() + rad * res * sin(num * (2 * 3.14159265359 / tot));
float y = pp.getY() + rad * res * cos(num * (2 * 3.14159265359 / tot));

return util::geo::FPoint{x, y};
return util::geo::DPoint{x, y};
}
}
13 changes: 7 additions & 6 deletions src/qlever-petrimaps/server/Requestor.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ namespace petrimaps {
struct ResObj {
bool has;
size_t id;
std::vector<util::geo::FPoint> pos;
util::geo::DPoint pos;
std::vector<std::pair<std::string, std::string>> cols;

// the geometry
std::vector<util::geo::DLine> line;
std::vector<util::geo::DPolygon> poly;
util::geo::MultiPoint<double> point;
util::geo::MultiLine<double> line;
util::geo::MultiPolygon<double> poly;
};

struct ReaderCbPair {
Expand Down Expand Up @@ -95,14 +96,14 @@ class Requestor {

util::geo::MultiPolygon<double> geomPolyGeoms(size_t oid, double eps) const;
util::geo::MultiLine<double> geomLineGeoms(size_t oid, double eps) const;
util::geo::MultiPoint<float> geomPointGeoms(size_t oid, double res) const;
util::geo::MultiPoint<float> geomPointGeoms(size_t oid) const;
util::geo::MultiPoint<double> geomPointGeoms(size_t oid, double res) const;
util::geo::MultiPoint<double> geomPointGeoms(size_t oid) const;

util::geo::DLine extractLineGeom(size_t lineId) const;
bool isArea(size_t lineId) const;

size_t getNumObjects() const { return _numObjects; }
util::geo::FPoint clusterGeom(size_t cid, double res) const;
util::geo::DPoint clusterGeom(size_t cid, double res) const;

std::chrono::time_point<std::chrono::system_clock> createdAt() const {
return _createdAt;
Expand Down
Loading

0 comments on commit 7ea149b

Please sign in to comment.