diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/404.html b/404.html new file mode 100644 index 00000000..89200bef --- /dev/null +++ b/404.html @@ -0,0 +1,481 @@ + + + +
+ + + + + + + + + + + + + + + + +rio-cogeo can also be integrated directly in your custom script. See rio_cogeo.cogeo.cog_translate function.
+e.g:
+from rio_cogeo.cogeo import cog_translate
+
+def _translate(src_path, dst_path, profile="webp", profile_options={}, **options):
+ """Convert image to COG."""
+ # Format creation option (see gdalwarp `-co` option)
+ output_profile = cog_profiles.get(profile)
+ output_profile.update(dict(BIGTIFF="IF_SAFER"))
+ output_profile.update(profile_options)
+
+ # Dataset Open option (see gdalwarp `-oo` option)
+ config = dict(
+ GDAL_NUM_THREADS="ALL_CPUS",
+ GDAL_TIFF_INTERNAL_MASK=True,
+ GDAL_TIFF_OVR_BLOCKSIZE="128",
+ )
+
+ cog_translate(
+ src_path,
+ dst_path,
+ output_profile,
+ config=config,
+ in_memory=False,
+ quiet=True,
+ **options,
+ )
+ return True
+
import numpy
+
+import mercantile
+
+from rasterio.io import MemoryFile
+from rasterio.transform import from_bounds
+
+from rio_cogeo.cogeo import cog_translate
+from rio_cogeo.profiles import cog_profiles
+
+# Create GeoTIFF profile
+bounds = mercantile.bounds(mercantile.Tile(0,0,0))
+
+# Rasterio uses numpy array of shape of `(bands, height, width)`
+width = 1024
+height = 1024
+nbands = 3
+
+img_array = tile = numpy.random.rand(nbands, height, width).astype(numpy.float32)
+
+src_transform = from_bounds(*bounds, width=width, height=height)
+
+src_profile = dict(
+ driver="GTiff",
+ dtype="float32",
+ count=nbands,
+ height=height,
+ width=width,
+ crs="epsg:4326",
+ transform=src_transform,
+)
+
+
+with MemoryFile() as memfile:
+ with memfile.open(**src_profile) as mem:
+ # Populate the input file with numpy array
+ mem.write(img_array)
+
+ dst_profile = cog_profiles.get("deflate")
+ cog_translate(
+ mem,
+ "my-output-cog.tif",
+ dst_profile,
+ in_memory=True,
+ quiet=True,
+ )
+
from rasterio.io import MemoryFile
+
+from rio_cogeo.cogeo import cog_translate
+from rio_cogeo.profiles import cog_profiles
+
+from boto3.session import Session as boto3_session
+
+dst_profile = cog_profiles.get("deflate")
+
+with MemoryFile() as mem_dst:
+ # Important, we pass `mem_dst.name` as output dataset path
+ cog_translate("my-input-file.tif", mem_dst.name, dst_profile, in_memory=True)
+
+ # You can then use the memoryfile to do something else like
+ # upload to AWS S3
+ client = boto3_session.client("s3")
+ client.upload_fileobj(mem_dst, "my-bucket", "my-key")
+
Use Case: You may want to run your translation tasks in the background and keep +track of progress. To do so you can utilize an alternative text buffer and another +thread. By outputting the progress to a seperate text buffer you can then track +the translation progress without blocking the program. +
from rio_cogeo.cogeo import cog_translate
+from rio_cogeo.profiles import cog_profiles
+
+config = {
+ "GDAL_NUM_THREADS": "ALL_CPUS",
+ "GDAL_TIFF_INTERNAL_MASK": True,
+ "GDAL_TIFF_OVR_BLOCKSIZE": "128",
+}
+
+
+with open("logfile.txt", "w+") as buffer:
+
+ # Progress output buffer must be interactive
+ buffer.isatty = lambda: True
+
+ cog_translate(
+ "example-input.tif",
+ "example-output.tif",
+ cog_profiles.get("deflate"),
+ config=config,
+ in_memory=False,
+ nodata=0,
+ quiet=False,
+ progress_out=buffer,
+ )
+
Below is a snippet of code that allows you to grab the percentage complete a +translation is using the text buffer.
+import re
+
+def getPercentage(buffer:str) -> float:
+ return int(re.findall("\d*%", buffer)[-1].replace("%", "")) / 100
+
rio-cogeo provide a --web-optimized option which aims to create a web-tiling friendly COG.
+Output dataset features:
+Important
+Because it will certainly create a larger file (with padding tiles on the side of the file), a nodata value, an alpha band or an internal mask should +be present in the input dataset. If not the original data will be surrounded by black (0) data.
+By default rio cogeo will create a dataset with 512x512 internal tile size.
+This can be updated by passing --co BLOCKXSIZE=64 --co BLOCKYSIZE=64
options.
Web tiling optimization
+Creating a Web-Optimized COG, means you'll get a file which is perfectly aligned (bounds and internal tiles) with the mercator grid and with resolution (for the raw data and overview) which map the mercator zoom level resolution. This enable to reduce the number of GET request a dynamic tiling service needs to do to create a map tile from your COG.
+if the input dataset is not aligned to web mercator grid, the tiler will need +to fetch multiple internal tiles.
+By default rio cogeo will calculate the optimal overview level based on dataset +size and internal tile size (overview should not be smaller than internal tile +size (e.g 512px). Overview level will be translated to decimation level of +power of two:
+overview_level = 3
+overviews = [2 ** j for j in range(1, overview_level + 1)]
+print(overviews)
+[2, 4, 8]
+
As described above, a decimation base of 2 is used by default. However you can provide a custom base, N > 1, with --decimation-base N. Optimal overviews are computed assuming a base 2 is used, so using --decimation-base also requires that --overview-level is provided. Similar to the default example, here are the overviews for base 3:
+overview_level = 3
+decimation_base = 3
+overviews = [decimation_base ** j for j in range(1, overview_level + 1)]
+print(overviews)
+[3, 9, 27]
+
This is primarily useful when working with custom TileMatrixSets that also use a non-default decimation base.
+By default rio cogeo DO NOT forward band metadata (e.g statistics) to the output dataset.
+$ gdalinfo my_file.tif
+...
+Band 1 Block=576x1 Type=Float64, ColorInterp=Gray
+ NoData Value=999999986991104
+ Unit Type: mol mol-1
+ Metadata:
+ long_name=CO2 Dry-Air Column Average
+ missing_value=9.9999999e+14
+ NETCDF_DIM_time=0
+ NETCDF_VARNAME=XCO2MEAN
+ units=mol mol-1
+ _FillValue=9.9999999e+14
+
+$ rio cogeo my_file.tif my_cog.tif --blocksize 256
+
+$ gdalinfo my_cog.tif
+...
+Band 1 Block=256x256 Type=Float64, ColorInterp=Gray
+ NoData Value=999999986991104
+ Overviews: 288x181
+
You can use --forward-band-tags
to forwards the band metadata to output dataset.
$ rio cogeo create my_file.tif my_cog.tif --blocksize 256 --forward-band-tags
+$ gdalinfo my_cog.tif
+...
+Band 1 Block=256x256 Type=Float64, ColorInterp=Gray
+ NoData Value=999999986991104
+ Overviews: 288x181
+ Metadata:
+ long_name=CO2 Dry-Air Column Average
+ missing_value=9.9999999e+14
+ NETCDF_DIM_time=0
+ NETCDF_VARNAME=XCO2MEAN
+ units=mol mol-1
+ _FillValue=9.9999999e+14
+
By default rio-cogeo will forward any nodata value or alpha channel to the +output COG.
+If your dataset type is Byte or Unit16, you could use internal bit mask
+(with the --add-mask
option) to replace the Nodata value or Alpha band in
+output dataset (supported by most GDAL based backends).
Note: when adding a mask
with an input dataset having an alpha band you'll
+need to use the bidx
options to remove it from the output dataset.
# Replace the alpha band by an internal mask
+$ rio cogeo mydataset_withalpha.tif mydataset_withmask.tif --cog-profile raw --add-mask --bidx 1,2,3
+
Important
+Using internal nodata value with lossy compression (webp
, jpeg
) is not
+recommended. Please use internal masking (or alpha band if using webp).
rio-cogeo modules adds a cogeo
sub-command to your rasterio (rio) CLI. Three (3) commands are then available: create
, info
and validate
.
$ rio cogeo --help
+ Usage: rio cogeo [OPTIONS] COMMAND [ARGS]...
+
+ Rasterio cogeo subcommands.
+
+ Options:
+ --version Show the version and exit.
+ --help Show this message and exit.
+
+ Commands:
+ create Create COGEO
+ info Lists information about a raster dataset.
+ validate Validate COGEO
+
$ rio cogeo create --help
+ Usage: rio cogeo create [OPTIONS] INPUT OUTPUT
+
+ Create Cloud Optimized Geotiff.
+
+ Options:
+ --bidx, -b Band indexes to copy.
+ --cog-profile, -p CloudOptimized GeoTIFF profile (default: deflate). [jpeg|webp|zstd|lzw|deflate|packbits|lzma|lerc|lerc_deflate|lerc_zstd|raw]
+ --nodata Set nodata masking values for input dataset.
+ --add-mask Force output dataset creation with an internal mask (convert alpha band or nodata to mask).
+ --blocksize Overwrite profile's tile size.
+ --dtype, -t Output data type. [ubyte|uint8|uint16|int16|uint32|int32|float32|float64]
+ --overview-level Overview level (if not provided, appropriate overview level will be selected until the smallest overview is smaller than the value of the internal blocksize)
+ --overview-resampling Overview creation resampling algorithm (default: nearest). [nearest|bilinear|cubic|cubic_spline|lanczos|average|mode|gauss]
+ --overview-blocksize Overview's internal tile size (default defined by GDAL_TIFF_OVR_BLOCKSIZE env or 128)
+ --web-optimized, -w Create COGEO optimized for Web.
+ --zoom-level-strategy Strategy to determine zoom level (default: auto). [lower|upper|auto]
+ --zoom-level Zoom level number for the highest resolution. If this option is specified, `--zoom-level-strategy` is ignored.
+ --aligned-levels Number of overview levels for which GeoTIFF tile and tiles defined in the tiling scheme match.
+ --resampling, -r Resampling algorithm (default: nearest). Will only be applied with the `--web-optimized` option. [nearest|bilinear|cubic|cubic_spline|lanczos|average|mode|max|min|med|q1|q3|sum]
+ --in-memory / --no-in-memory Force processing raster in memory / not in memory (default: process in memory if smaller than 120 million pixels)
+ --allow-intermediate-compression Allow intermediate file compression to reduce memory/disk footprint.
+ --forward-band-tags Forward band tags to output bands.
+ --forward-ns-tags Forward namespaced tags to output dataset.
+ --threads Number of worker threads for multi-threaded compression (default: ALL_CPUS)
+ --use-cog-driver Use GDAL COG Driver (require GDAL>=3.1).
+ --tms PATH Path to TileMatrixSet JSON file.
+ --co, --profile Driver specific creation options. See the documentation for the selected output driver for more information.
+ --config GDAL configuration options.
+ --quiet, -q Remove progressbar and other non-error output.
+ --help Show this message and exit.
+
$ rio cogeo validate --help
+Usage: rio cogeo validate [OPTIONS] INPUT
+
+ Validate Cloud Optimized Geotiff.
+
+Options:
+ --strict Treat warnings as errors.
+ --config NAME=VALUE GDAL configuration options.
+ --help Show this message and exit.
+
The strict
options will treat warnings (e.g missing overviews) as errors.
Using the --config
option can be useful to restrict GDAL environment. By default GDAL will check for external files (such as overviews), which could make a COG invalid. To force GDAL to only consider the input file you can use --config GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR
.
e.g: +
$ rio cogeo validate s3://bucket/geo.tif --config GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR --config AWS_NO_SIGN_REQUEST=YES
+
(extended version or rio info
).
$ rio cogeo info --help
+Usage: rio cogeo info [OPTIONS] INPUT
+
+ Dataset info.
+
+Options:
+ --json Print as JSON.
+ --help Show this message and exit.
+
# Create a COGEO with DEFLATE compression (Using default `Deflate` profile)
+$ rio cogeo create mydataset.tif mydataset_jpeg.tif
+
+# Validate COGEO
+$ rio cogeo validate mydataset_jpeg.tif
+
+# Create a COGEO with JPEG profile and the first 3 bands of the data and add internal mask
+$ rio cogeo create mydataset.tif mydataset_jpeg.tif -b 1,2,3 --add-mask --cog-profile jpeg
+
+# List Raster info
+$ rio cogeo info mydataset_jpeg.tif
+Driver: GTiff
+File: mydataset_jpeg.tif
+COG: True
+Compression: DEFLATE
+ColorSpace: None
+
+Profile
+ Width: 10980
+ Height: 10980
+ Bands: 1
+ Tiled: True
+ Dtype: uint16
+ NoData: 0.0
+ Alpha Band: False
+ Internal Mask: False
+ Interleave: BAND
+ Colormap: False
+
+Geo
+ Crs: EPSG:32634
+ Origin: (699960.0, 3600000.0)
+ Resolution: (10.0, -10.0)
+ BoundingBox: (699960.0, 3490200.0, 809760.0, 3600000.0)
+ MinZoom: 10
+ MaxZoom: 19
+
+IFD
+ Id Size BlockSize Decimation
+ 0 10980x10980 1024x1024 0
+ 1 5490x5490 128x128 2
+ 2 2745x2745 128x128 4
+ 3 1373x1373 128x128 8
+ 4 687x687 128x128 16
+
Requirements
+$ pip install rio-cogeo
The COG Specification is pretty basic
+++A cloud optimized GeoTIFF is a regular GeoTIFF file, aimed at being hosted on a HTTP file server, whose internal organization is friendly for consumption by clients issuing HTTP GET range request ("bytes: start_offset-end_offset" HTTP header). +It contains at its beginning the metadata of the full resolution imagery, followed by the optional presence of overview metadata, and finally the imagery itself. To make it friendly with streaming and progressive rendering, we recommend starting with the imagery of the smallest overview and finishing with the imagery of the full resolution level.
+
Ref: github.com/cogeotiff/cog-spec/blob/master/spec.md
+In Short, the specification just means you MUST create a GeoTIFF with internal block (tile) and the header must be ordered.
+From a command line point of view, it just means you need to add --co TILED=TRUE
in a gdal_translate command.
Natural Earth web site host really neat raster and vector datasets. Let's download a large scale raster image: www.naturalearthdata.com/downloads/50m-raster-data/50m-cross-blend-hypso/
+$ wget https://naciscdn.org/naturalearth/50m/raster/HYP_50M_SR.zip
+
Here is what we want to look at:
+$ rio cogeo info HYP_50M_SR.tif
+Driver: GTiff
+File: /Users/vincentsarago/Downloads/HYP_50M_SR/HYP_50M_SR.tif
+Compression: None
+ColorSpace: None
+
+Profile
+ Width: 10800
+ Height: 5400
+ Bands: 3
+ Tiled: False
+ Dtype: uint8
+ NoData: None
+ Alpha Band: False
+ Internal Mask: False
+ Interleave: PIXEL
+ ColorMap: False
+
+Geo
+ Crs: EPSG:4326
+ Origin: (-179.99999999999997, 90.0)
+ Resolution: (0.03333333333333, -0.03333333333333)
+ BoundingBox: (-179.99999999999997, -89.99999999998201, 179.99999999996405, 90.0)
+
+IFD
+ Id Size BlockSize Decimation
+ 0 10800x5400 10800x1 0
+
What we can see from the rio cogeo info output:
+Tiled: false
and BlockSize=10800x1
)With those informations we already know the GeoTIFF is not a COG (no internal blocks), but let's confirm with the validation script.
+$ rio cogeo validate HYP_50M_SR.tif
+The following warnings were found:
+- The file is greater than 512xH or 512xW, it is recommended to include internal overviews
+
+The following errors were found:
+- The file is greater than 512xH or 512xW, but is not tiled
+- The offset of the main IFD should be 8 for ClassicTIFF or 16 for BigTIFF. It is 174982088 instead
+- The offset of the first block of the image should be after its IFD
+/Users/vincentsarago/Downloads/HYP_50M_SR/HYP_50M_SR.tif is NOT a valid cloud optimized GeoTIFF
+
As mentioned earlier, the validation script confirms the GeoTIFF is not internally tiled and doesn't have overviews.
+Creating a valid Cloud Optimized GeoTIFF, is not just about creating internal tiles and/or internal overviews. The file internal structure has to be specific and require a complete copy of a file, which is what rio-cogeo does internally.
+$ rio cogeo create HYP_50M_SR.tif HYP_50M_SR_COG.tif
+Reading input: /Users/vincentsarago/Downloads/HYP_50M_SR/HYP_50M_SR.tif
+ [####################################] 100%
+Adding overviews...
+Updating dataset tags...
+Writing output to: /Users/vincentsarago/Downloads/HYP_50M_SR/HYP_50M_SR_COG.tif
+
You could get the same COG with GDAL commands
+$ gdal_translate HYP_50M_SR.tif tmp.tif -co TILED=YES -co COMPRESS=DEFLATE
+$ gdaladdo -r nearest tmp.tif 2 4 8 16 32
+$ gdal_translate tmp.tif HYP_50M_SR_COG.tif -co TILED=YES -co COMPRESS=DEFLATE -co COPY_SRC_OVERVIEWS=YES
+
By default rio-cogeo
will create a COG with 512x512 blocksize (for the raw resolution) and use DEFLATE compression to reduce file size.
$ rio cogeo info HYP_50M_SR_COG.tif
+Driver: GTiff
+File: /Users/vincentsarago/Downloads/HYP_50M_SR/HYP_50M_SR_COG.tif
+Compression: DEFLATE
+ColorSpace: None
+
+Profile
+ Width: 10800
+ Height: 5400
+ Bands: 3
+ Tiled: True
+ Dtype: uint8
+ NoData: None
+ Alpha Band: False
+ Internal Mask: False
+ Interleave: PIXEL
+ ColorMap: False
+
+Geo
+ Crs: EPSG:4326
+ Origin: (-179.99999999999997, 90.0)
+ Resolution: (0.03333333333333001, -0.03333333333333001)
+ BoundingBox: (-179.99999999999997, -89.99999999998204, 179.9999999999641, 90.0)
+
+IFD
+ Id Size BlockSize Decimation
+ 0 10800x5400 512x512 0
+ 1 5400x2700 128x128 2
+ 2 2700x1350 128x128 4
+ 3 1350x675 128x128 8
+ 4 675x338 128x128 16
+
The importance of the compression
+$ ls -lah
+-rw-r--r--@ 1 youpi staff 167M Oct 18 2014 HYP_50M_SR.tif
+-rw-r--r-- 1 youpi staff 58M Jun 12 14:56 HYP_50M_SR_COG.tif
+
By using rio-cogeo
, we are not only creating a valid COG with internal tiling but we are also adding internal overviews (which let us get previews of the raw resolution with few GET requests).
Even with the addition of 4 levels of overviews (see IFD section in previous rio cogeo info
output), we managed to reduce the file size by 3 (167Mb → 58Mb), and this is because rio cogeo applies Deflate compression by default to the COG.
More Magic ?
+As seen in the first rio cogeo info
output, the data has 3 bands (RGB) and is of Uint8 data type. Because of this configuration, we can use even more efficient compression like JPEG or WEBP.
$ rio cogeo create HYP_50M_SR.tif HYP_50M_SR_COG_jpeg.tif -p jpeg
+Reading input: /Users/vincentsarago/Downloads/HYP_50M_SR/HYP_50M_SR.tif
+ [####################################] 100%
+Adding overviews...
+Updating dataset tags...
+Writing output to: /Users/vincentsarago/Downloads/HYP_50M_SR/HYP_50M_SR_COG_jpeg.tif
+
+$ ls -lah
+-rw-r--r--@ 1 vincentsarago staff 167M Oct 18 2014 HYP_50M_SR.tif
+-rw-r--r-- 1 vincentsarago staff 58M Jun 12 14:56 HYP_50M_SR_COG.tif
+-rw-r--r-- 1 vincentsarago staff 4.8M Jun 15 11:08 HYP_50M_SR_COG_jpeg.tif
+
Now, our output file is only 4.8Mb, which is only ~3% of the original size 😱.
+Note:
+You can either load the COG in QGIS or use our plugin (rio-viz) to load it in a web browser.
+$ pip install rio-viz
+$ rio viz HYP_50M_SR_COG.tif
+