Skip to content

Commit

Permalink
Merge pull request #213 from spwoodcock/fix/flatgeobuf-geomcol
Browse files Browse the repository at this point in the history
Optional param to wrap flatgeobuf geometries in GeometryCollection
  • Loading branch information
kshitijrajsharma authored Feb 21, 2024
2 parents 73f0008 + f51bfd3 commit 77c1359
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 156 deletions.
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,16 @@

## Installation

#### To setup Backend Follow [Backend_Installation](./backend/Readme.md)
Raw Data API consists of two elements:

- A **backend** database, tools, and scripts: used to import OSM data into a specific database structure and keep it updated.
- An **API** that is used to serve data from the backend database.

#### To setup the backend see [Backend Installation](./installation/backend)

Raw Data API can be installed through `docker` or locally on your computer.

- To install with docker see [docker installation](./docs/src/installation/docker.md).
- To install with docker see [docker installation](./installation/docker).
- To install locally, continue below.

NOTE: The installation guide below is only tested to work on Ubuntu, we recommend using docker for other operating systems.
Expand Down Expand Up @@ -90,7 +95,7 @@ pip install -r requirements.txt

### Additional required configurations for Raw Data API

Setup the necessary configurations for Raw Data API from [configurations](./docs/src/installation/configurations.md).
Setup the necessary configurations for Raw Data API from [configurations](./installation/configurations).

Setup config.txt in project root.

Expand Down Expand Up @@ -202,7 +207,7 @@ py.test -k test function name

## Contribution & Development

Learn about current priorities and work going through Roadmap & see here [CONTRIBUTING](./docs/src/contributing.md)
Learn about current priorities and work going through Roadmap & see here [CONTRIBUTING](./contributing)

## Roadmap
https://github.com/orgs/hotosm/projects/29
Expand Down
1 change: 1 addition & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ extra:
nav:
- Raw Data API : "index.md"
- Installation:
- Backend: "installation/backend.md"
- Docker Installation: "installation/docker.md"
- Configurations: "installation/configurations.md"
# - User Guide: 'user_guide/index.md'
Expand Down
1 change: 1 addition & 0 deletions docs/src/installation/backend.md
147 changes: 53 additions & 94 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,110 +529,69 @@ def ogr_export_shp(point_query, line_query, poly_query, working_dir, file_name):

@staticmethod
def ogr_export(query, outputtype, working_dir, dump_temp_path, params):
"""Function written to support ogr type extractions as well , In this way we will be able to support all file formats supported by Ogr , Currently it is slow when dataset gets bigger as compared to our own conversion method but rich in feature and data types even though it is slow"""
"""Generates ogr2ogr command based on outputtype and parameters
Args:
query (_type_): Postgresql query to extract
outputtype (_type_): _description_
working_dir (_type_): _description_
dump_temp_path (_type_): temp file path for metadata gen
params (_type_): _description_
"""
db_items = get_db_connection_params()
# format query if it has " in string"
query_path = os.path.join(working_dir, "export_query.sql")
# writing to .sql to pass in ogr2ogr because we don't want to pass too much argument on command with sql
with open(query_path, "w", encoding="UTF-8") as file:
file.write(query)
# for mbtiles we need additional input as well i.e. minzoom and maxzoom , setting default at max=22 and min=10
if ENABLE_TILES:
if outputtype == RawDataOutputType.MBTILES.value:
if params.min_zoom and params.max_zoom:
cmd = """ogr2ogr -overwrite -f MBTILES -dsco MINZOOM={min_zoom} -dsco MAXZOOM={max_zoom} {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
min_zoom=params.min_zoom,
max_zoom=params.max_zoom,
export_path=dump_temp_path,
host=db_items.get("host"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
else:
cmd = """ogr2ogr -overwrite -f MBTILES -dsco ZOOM_LEVEL_AUTO=YES {export_path} PG:"host={host} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,

format_options = {
RawDataOutputType.MBTILES.value: {
"format": "MBTILES",
"extra": (
"-dsco MINZOOM={} -dsco MAXZOOM={} ".format(
params.min_zoom, params.max_zoom
)
run_ogr2ogr_cmd(cmd)
if params.min_zoom and params.max_zoom
else "-dsco ZOOM_LEVEL_AUTO=YES"
),
},
RawDataOutputType.FLATGEOBUF.value: {
"format": "FLATGEOBUF",
"extra": "-lco SPATIAL_INDEX=YES VERIFY_BUFFERS=NO",
},
RawDataOutputType.GEOPARQUET.value: {
"format": "Parquet",
"extra": "",
},
RawDataOutputType.PGDUMP.value: {
"format": "PGDump",
"extra": "--config PG_USE_COPY YES -lco SRID=4326",
},
RawDataOutputType.KML.value: {
"format": "KML",
"extra": "",
},
RawDataOutputType.CSV.value: {
"format": "CSV",
"extra": "",
},
RawDataOutputType.GEOPACKAGE.value: {
"format": "GPKG",
"extra": "",
},
}

if outputtype == RawDataOutputType.FLATGEOBUF.value:
cmd = """ogr2ogr -overwrite -f FLATGEOBUF {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress VERIFY_BUFFERS=NO""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
file_name_option = (
f"-nln {params.file_name if params.file_name else 'raw_export'}"
)

if outputtype == RawDataOutputType.GEOPARQUET.value:
cmd = """ogr2ogr -overwrite -f Parquet {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
if outputtype == RawDataOutputType.FLATGEOBUF.value and params.fgb_wrap_geoms:
format_options[outputtype]["extra"] += " -nlt GEOMETRYCOLLECTION"

if outputtype == RawDataOutputType.PGDUMP.value:
cmd = """ogr2ogr -overwrite --config PG_USE_COPY YES -f PGDump {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco SRID=4326 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
format_option = format_options.get(outputtype, {"format": "", "extra": ""})

if outputtype == RawDataOutputType.KML.value:
cmd = """ogr2ogr -overwrite -f KML {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)

if outputtype == RawDataOutputType.CSV.value:
cmd = """ogr2ogr -overwrite -f CSV {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
cmd = f"ogr2ogr -overwrite -f {format_option['format']} {dump_temp_path} PG:\"host={db_items.get('host')} port={db_items.get('port')} user={db_items.get('user')} dbname={db_items.get('dbname')} password={db_items.get('password')}\" -sql @{query_path} -lco ENCODING=UTF-8 -progress {format_option['extra']} {file_name_option}"
run_ogr2ogr_cmd(cmd)

if outputtype == RawDataOutputType.GEOPACKAGE.value:
cmd = """ogr2ogr -overwrite -f GPKG {export_path} PG:"host={host} port={port} user={username} dbname={db} password={password}" -sql @"{pg_sql_select}" -lco ENCODING=UTF-8 -progress""".format(
export_path=dump_temp_path,
host=db_items.get("host"),
port=db_items.get("port"),
username=db_items.get("user"),
db=db_items.get("dbname"),
password=db_items.get("password"),
pg_sql_select=query_path,
)
run_ogr2ogr_cmd(cmd)
# clear query file we don't need it anymore
os.remove(query_path)

@staticmethod
Expand Down
12 changes: 4 additions & 8 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,6 @@ def get_bool_env_var(key, default=False):
"API_CONFIG", "LOG_LEVEL", fallback="debug"
)

ALLOW_BIND_ZIP_FILTER = os.environ.get("ALLOW_BIND_ZIP_FILTER") or config.get(
"API_CONFIG", "ALLOW_BIND_ZIP_FILTER", fallback=None
)

ENABLE_TILES = os.environ.get("ENABLE_TILES") or config.get(
"API_CONFIG", "ENABLE_TILES", fallback=None
)


def not_raises(func, *args, **kwargs):
try:
Expand Down Expand Up @@ -166,10 +158,14 @@ def not_raises(func, *args, **kwargs):
if not os.path.exists(EXPORT_PATH):
# Create a exports directory because it does not exist
os.makedirs(EXPORT_PATH)

ALLOW_BIND_ZIP_FILTER = get_bool_env_var(
"ALLOW_BIND_ZIP_FILTER",
config.getboolean("API_CONFIG", "ALLOW_BIND_ZIP_FILTER", fallback=False),
)
ENABLE_TILES = get_bool_env_var(
"ENABLE_TILES", config.getboolean("API_CONFIG", "ENABLE_TILES", fallback=False)
)

# check either to use connection pooling or not
USE_CONNECTION_POOLING = get_bool_env_var(
Expand Down
68 changes: 36 additions & 32 deletions src/validation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ class RawDataCurrentParams(RawDataCurrentParamsBase):
default=True,
description="Attaches uid to exports by default , Only disable this if it is recurring export",
)
fgb_wrap_geoms: Optional[bool] = Field(
default=False,
description="Wraps all flatgeobuff output to geometrycollection geometry type",
)
if ALLOW_BIND_ZIP_FILTER:
bind_zip: Optional[bool] = True

Expand Down Expand Up @@ -292,22 +296,22 @@ class StatsRequestParams(BaseModel, GeometryValidatorMixin):
max_length=3,
example="NPL",
)
geometry: Optional[
Union[Polygon, MultiPolygon, Feature, FeatureCollection]
] = Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
geometry: Optional[Union[Polygon, MultiPolygon, Feature, FeatureCollection]] = (
Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
)
)

@validator("geometry", pre=True, always=True)
Expand Down Expand Up @@ -604,22 +608,22 @@ class DynamicCategoriesModel(BaseModel, GeometryValidatorMixin):
}
],
)
geometry: Optional[
Union[Polygon, MultiPolygon, Feature, FeatureCollection]
] = Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
geometry: Optional[Union[Polygon, MultiPolygon, Feature, FeatureCollection]] = (
Field(
default=None,
example={
"type": "Polygon",
"coordinates": [
[
[83.96919250488281, 28.194446860487773],
[83.99751663208006, 28.194446860487773],
[83.99751663208006, 28.214869548073377],
[83.96919250488281, 28.214869548073377],
[83.96919250488281, 28.194446860487773],
]
],
},
)
)

@validator("geometry", pre=True, always=True)
Expand Down
Loading

0 comments on commit 77c1359

Please sign in to comment.