From 5fd1b4784a21cc948be410565ca5e525dfbf0c7f Mon Sep 17 00:00:00 2001 From: Brodrick Date: Wed, 25 Mar 2020 08:50:28 -0700 Subject: [PATCH] added install file, basic file checking, started multi-raster functions --- emit_utils/file_checks.py | 101 ++++++++++++++++++++++++++++++++ emit_utils/multi_raster_info.py | 62 ++++++++++++++++++++ setup.py | 18 ++++++ 3 files changed, 181 insertions(+) create mode 100644 emit_utils/file_checks.py create mode 100644 emit_utils/multi_raster_info.py create mode 100644 setup.py diff --git a/emit_utils/file_checks.py b/emit_utils/file_checks.py new file mode 100644 index 0000000..ecfda61 --- /dev/null +++ b/emit_utils/file_checks.py @@ -0,0 +1,101 @@ +""" +This code performs simple file checks, with particular consideration of geospatial files. + +Author: Philip G. Brodrick, philip.brodrick@jpl.nasa.gov +""" + +import os +import gdal +import logging +from typing import List + + +def check_files_exist(file_list: List): + """ Check if files exist on the system. + Args: + file_list: array-like of files to check + Returns: + None + """ + anybad = False + for file in file_list: + if os.path.isfile(file) is False: + logging.error('File: {} does not exist'.format(file)) + anybad = True + + if anybad: + FileNotFoundError('Files missing, check logs for details') + + +def check_raster_drivers(file_list: List): + """ Check if files have a recognized gdal raster driver (e.g., can be opened). + Args: + file_list: array-like of files to check + Returns: + None + """ + anybad = False + for file in file_list: + if gdal.Open(file, gdal.GA_ReadOnly) is None: + logging.error('Input GLT file: {} not a recognized raster format'.format(file)) + anybad = True + if anybad: + FileNotFoundError('Files not in recognized raster format, check logs for details') + + +def check_same_raster_projections(file_list: List): + """ Check that files have the same projection. Assumes input files exist and are rasters. + Args: + file_list: array-like of files to check + Returns: + None + """ + anybad = False + base_projection = gdal.Open(file_list[0], gdal.GA_ReadOnly).GetGCPSpatialRef() + for file in file_list: + dataset = gdal.Open(file, gdal.GA_ReadOnly) + if dataset.GetGCPSpatialRef() != base_projection: + logging.error('Projection in file {} differs from projection in file {}'.format(file, file_list[0])) + anybad = True + if anybad: + AttributeError('Files have mismatched projections, check logs for details') + + +def check_same_raster_resolutions(file_list: List, fractional_tolerance: float = 0.0001): + """ Check that files have the same resolution. Assumes input files exist and are rasters. + Args: + file_list: array-like of files to check + fractional_tolerance: relative tolerance deemed acceptable for resolution mismatch + Returns: + None + """ + anybad = False + base_trans = gdal.Open(file_list[0], gdal.GA_ReadOnly).GetGeoTranform() + for file in file_list: + transform = gdal.Open(file, gdal.GA_ReadOnly).GetGeoTransform() + if abs((transform[1] - base_trans[1])/base_trans[1]) < fractional_tolerance and \ + abs((transform[5] - base_trans[5]) / base_trans[5]) < fractional_tolerance: + logging.error('Resolution in file {} differs from resolution in file {}'.format(file, file_list[0])) + anybad = True + if anybad: + AttributeError('Files have mismatched resolutions, check logs for details') + + +def check_raster_files(file_list: List, fractional_tolerance: float = 0.0001, map_space: bool = False): + """ Check that files exist, are openable by gdal, and have the same projections + and resolutions (if necessary) + Args: + file_list: array-like of files to check + fractional_tolerance: relative tolerance deemed acceptable for resolution mismatch + map_space: + Returns: + None + """ + + check_files_exist(file_list) + check_raster_drivers(file_list) + if map_space: + check_same_raster_projections(file_list) + check_same_raster_resolutions(file_list, fractional_tolerance=fractional_tolerance) + + diff --git a/emit_utils/multi_raster_info.py b/emit_utils/multi_raster_info.py new file mode 100644 index 0000000..f4d98f5 --- /dev/null +++ b/emit_utils/multi_raster_info.py @@ -0,0 +1,62 @@ +""" +This code does some useful manipulations on multiple raster files. + +Author: Philip G. Brodrick, philip.brodrick@jpl.nasa.gov +""" + + +import gdal +from typing import List +import numpy as np + + +def get_bounding_extent(file_list: List, return_pixel_offsets=False, return_spatial_offsets=False): + """ + Get the outer bounded extent of a list of files, with options to also return pixel and + spatial offsets of each file. Pixel offsets assume everything is on the same grid. + Args: + file_list: array-like input of geospatial files + return_pixel_offsets: flag indicating if per-file pixel offsets should be returned + return_spatial_offsets: flag indicating if per-file spatial offsets should be returned + Returns: + x_min, y_max, x_max, y_min, [offset_px_list (x,y tuples)], [offset_spatial_list (x,y tuples)] + """ + + geotransforms = [] + extents = [] + for file in file_list: + dataset = gdal.Open(file, gdal.GA_ReadOnly) + geotransforms.append(dataset.GetGeoTransform()) + extents.append((dataset.RasterXSize, dataset.RasterYSize)) + + # find bounding x and y coordinate locations + min_x = np.nanmin([x[0] for x in geotransforms]) + max_x = np.nanmin([x[0] + x[1]*xs[0] for x, xs in zip(geotransforms, extents)]) + max_y = np.nanmax([x[3] for x in geotransforms]) + min_y = np.nanmin([x[3] + x[5]*xs[1] for x, xs in zip(geotransforms, extents)]) + + return_set = min_x, max_y, max_x, min_y + if return_pixel_offsets is False and return_spatial_offsets is False: + return return_set + + px_offsets = [] + map_offsets = [] + for trans in geotransforms: + x_offset_px = int(round((trans[0] - min_x)/trans[1])) + x_offset_map = trans[0] - min_x + + y_offset_px = int(round((trans[3] - max_y)/trans[1])) + y_offset_map = max_y - trans[3] # report in positive displacement units, arbitrary choice + + px_offsets.append((x_offset_px,y_offset_px)) + map_offsets.append((x_offset_map,y_offset_map)) + + if return_pixel_offsets: + return_set += px_offsets + + if return_spatial_offsets: + return_set += map_offsets + + return return_set + + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0c2b717 --- /dev/null +++ b/setup.py @@ -0,0 +1,18 @@ +""" +Core utility package to support EMIT data processing. + +Author: Philip G. Brodrick, philip.brodrick@jpl.nasa.gov +""" + +from setuptools import setup, find_packages + +setup(name='emit_utils', + packages=find_packages(), + include_package_data=True, + version='0.1.0', + install_requires=['gdal>=2.0'], + python_requires='>=3', + platforms='any', + classifiers=['Programming Language :: Python :: 3', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent'])