diff --git a/LICENSE b/LICENSE index 81a75826e..0dd8f3f8c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ This software is licensed under the Apache 2 license, quoted below. -Copyright 2017 Astraea. Inc. +Copyright 2017-2018 Astraea. Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of diff --git a/README.md b/README.md index 8f97baf05..269166f7b 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,13 @@ Please see the [Getting Started](http://rasterframes.io/getting-started.html) se ## Documentation +* [Giter8 Template](https://github.com/s22s/raster-frames.g8) (i.e. `sbt new s22s/raster-frames.g8`) * [Users' Manual](http://rasterframes.io/) * [API Documentation](http://rasterframes.io/latest/api/index.html) -* [List of available UDFs](http://rasterframes.io/latest/api/index.html#astraea.spark.rasterframes.ColumnFunctions) +* [List of available UDFs](http://rasterframes.io/latest/api/index.html#astraea.spark.rasterframes.RasterFunctions) ## Copyright and License -RasterFrames is released under the Apache 2.0 License, copyright Astraea, Inc. 2017. +RasterFrames is released under the Apache 2.0 License, copyright Astraea, Inc. 2017-2018. diff --git a/build.sbt b/build.sbt index e40706bd1..b7d645e3f 100644 --- a/build.sbt +++ b/build.sbt @@ -1,4 +1,5 @@ addCommandAlias("makeSite", "docs/makeSite") +addCommandAlias("console", "datasource/console") lazy val root = project .in(file(".")) @@ -18,27 +19,4 @@ lazy val docs = project lazy val bench = project .dependsOn(core) -initialCommands in console := """ - |import astraea.spark.rasterframes._ - |import geotrellis.raster._ - |import geotrellis.spark.io.kryo.KryoRegistrator - |import org.apache.spark.serializer.KryoSerializer - |import org.apache.spark.sql._ - |import org.apache.spark.sql.functions._ - |implicit val spark = SparkSession.builder() - | .master("local[*]") - | .appName(getClass.getName) - | .config("spark.serializer", classOf[KryoSerializer].getName) - | .config("spark.kryoserializer.buffer.max", "500m") - | .config("spark.kryo.registrationRequired", "false") - | .config("spark.kryo.registrator", classOf[KryoRegistrator].getName) - | .getOrCreate() - | .withRasterFrames - |spark.sparkContext.setLogLevel("ERROR") - |import spark.implicits._ - | -""".stripMargin -cleanupCommands in console := """ - |spark.stop() -""".stripMargin diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala index b9e2ce4f3..2e86fddaf 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala +++ b/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala @@ -16,44 +16,44 @@ package astraea.spark.rasterframes.extensions +import astraea.spark.rasterframes.StandardColumns._ +import astraea.spark.rasterframes.util._ +import astraea.spark.rasterframes.{MetadataKeys, RasterFrame} +import geotrellis.raster.Tile import geotrellis.spark.io._ import geotrellis.spark.{SpaceTimeKey, SpatialComponent, SpatialKey, TemporalKey, TileLayerMetadata} import geotrellis.util.MethodExtensions import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.gt._ -import org.apache.spark.sql.types.MetadataBuilder -import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.gt.types.TileUDT +import org.apache.spark.sql.types.{MetadataBuilder, StructField} +import org.apache.spark.sql.{Column, DataFrame, TypedColumn} import spray.json.JsonFormat -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.{MetadataKeys, RasterFrame} -import scala.util.Try +import scala.util.Try /** * Extension methods over [[DataFrame]]. * * @since 7/18/17 */ -trait DataFrameMethods extends MethodExtensions[DataFrame] with MetadataKeys { - import Implicits.WithMetadataBuilderMethods - import Implicits.WithMetadataMethods - import Implicits.WithRasterFrameMethods - import Implicits.WithDataFrameMethods +trait DataFrameMethods[DF <: DataFrame] extends MethodExtensions[DF] with MetadataKeys { + import Implicits.{WithDataFrameMethods, WithMetadataBuilderMethods, WithMetadataMethods, WithRasterFrameMethods} private def selector(column: Column) = (attr: Attribute) ⇒ attr.name == column.columnName || attr.semanticEquals(column.expr) /** Map over the Attribute representation of Columns, modifying the one matching `column` with `op`. */ - private[astraea] def mapColumnAttribute(column: Column, op: Attribute ⇒ Attribute): DataFrame = { + private[astraea] def mapColumnAttribute(column: Column, op: Attribute ⇒ Attribute): DF = { val analyzed = self.queryExecution.analyzed.output val selects = selector(column) val attrs = analyzed.map { attr ⇒ if(selects(attr)) op(attr) else attr } - self.select(attrs.map(a ⇒ new Column(a)): _*) + self.select(attrs.map(a ⇒ new Column(a)): _*).asInstanceOf[DF] } - private[astraea] def addColumnMetadata(column: Column, op: MetadataBuilder ⇒ MetadataBuilder): DataFrame = { + private[astraea] def addColumnMetadata(column: Column, op: MetadataBuilder ⇒ MetadataBuilder): DF = { mapColumnAttribute(column, attr ⇒ { val md = new MetadataBuilder().withMetadata(attr.metadata) attr.withMetadata(op(md).build) @@ -67,13 +67,13 @@ trait DataFrameMethods extends MethodExtensions[DataFrame] with MetadataKeys { private[astraea] def setSpatialColumnRole[K: SpatialComponent: JsonFormat]( - column: Column, md: TileLayerMetadata[K]) = + column: Column, md: TileLayerMetadata[K]): DF = addColumnMetadata(column, _.attachContext(md.asColumnMetadata).tagSpatialKey ) private[astraea] - def setTemporalColumnRole(column: Column) = + def setTemporalColumnRole(column: Column): DF = addColumnMetadata(column, _.tagTemporalKey) /** Get the role tag the column plays in the RasterFrame, if any. */ @@ -81,11 +81,47 @@ trait DataFrameMethods extends MethodExtensions[DataFrame] with MetadataKeys { def getColumnRole(column: Column): Option[String] = fetchMetadataValue(column, _.metadata.getString(SPATIAL_ROLE_KEY)) + /** Get the names of the columns that are of type `Tile` */ + def tileColumns: Seq[TypedColumn[Any, Tile]] = + self.schema.fields + .filter(_.dataType.typeName.equalsIgnoreCase(TileUDT.typeName)) + .map(f ⇒ col(f.name).as[Tile]) + + /** Get the spatial column. */ + def spatialKeyColumn: Option[TypedColumn[Any, SpatialKey]] = { + val key = findSpatialKeyField + key + .map(_.name) + .map(col(_).as[SpatialKey]) + } + + /** Get the temporal column, if any. */ + def temporalKeyColumn: Option[TypedColumn[Any, TemporalKey]] = { + val key = findTemporalKeyField + key.map(_.name).map(col(_).as[TemporalKey]) + } + + /** Find the field tagged with the requested `role` */ + private[rasterframes] def findRoleField(role: String): Option[StructField] = + self.schema.fields.find( + f ⇒ + f.metadata.contains(SPATIAL_ROLE_KEY) && + f.metadata.getString(SPATIAL_ROLE_KEY) == role + ) + + /** The spatial key is the first one found with context metadata attached to it. */ + private[rasterframes] def findSpatialKeyField: Option[StructField] = + findRoleField(SPATIAL_KEY_COLUMN.columnName) + + /** The temporal key is the first one found with the temporal tag. */ + private[rasterframes] def findTemporalKeyField: Option[StructField] = + findRoleField(TEMPORAL_KEY_COLUMN.columnName) + /** Renames all columns such that they start with the given prefix string. * Useful for preparing dataframes for joins where duplicate names may arise. */ - def withPrefixedColumnNames(prefix: String): DataFrame = - self.columns.foldLeft(self)((df, c) ⇒ df.withColumnRenamed(c, s"$prefix$c")) + def withPrefixedColumnNames(prefix: String): DF = + self.columns.foldLeft(self)((df, c) ⇒ df.withColumnRenamed(c, s"$prefix$c").asInstanceOf[DF]) /** Converts this DataFrame to a RasterFrame after ensuring it has: * diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala index 747ff058e..36e819d9c 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala +++ b/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala @@ -44,7 +44,7 @@ trait Implicits { implicit class WithProjectedRasterMethods(val self: ProjectedRaster[Tile]) extends ProjectedRasterMethods - implicit class WithDataFrameMethods(val self: DataFrame) extends DataFrameMethods + implicit class WithDataFrameMethods[D <: DataFrame](val self: D) extends DataFrameMethods[D] implicit class WithRasterFrameMethods(val self: RasterFrame) extends RasterFrameMethods diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala index b698e2255..a3f76d638 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala +++ b/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala @@ -103,7 +103,6 @@ trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with Standard case rf ⇒ rf.certify } } - } object RFSpatialColumnMethods { diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala index 7218a415d..64fcacf78 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala +++ b/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala @@ -16,13 +16,13 @@ package astraea.spark.rasterframes.extensions -import java.sql.Timestamp import java.time.ZonedDateTime +import astraea.spark.rasterframes.util._ import astraea.spark.rasterframes.{MetadataKeys, RasterFrame} import geotrellis.proj4.CRS import geotrellis.raster.resample.{Bilinear, ResampleMethod} -import geotrellis.raster.{MultibandTile, ProjectedRaster, Tile, TileLayout} +import geotrellis.raster.{CellGrid, MultibandTile, ProjectedRaster, Tile, TileLayout} import geotrellis.spark._ import geotrellis.spark.io._ import geotrellis.spark.tiling.{LayoutDefinition, Tiler} @@ -32,11 +32,8 @@ import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.functions._ -import org.apache.spark.sql.gt.types.TileUDT -import org.apache.spark.sql.types.{Metadata, StructField, TimestampType} +import org.apache.spark.sql.types.{Metadata, TimestampType} import spray.json._ -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.encoders.StandardEncoders._ import scala.reflect.runtime.universe._ @@ -46,57 +43,36 @@ import scala.reflect.runtime.universe._ */ trait RasterFrameMethods extends MethodExtensions[RasterFrame] with RFSpatialColumnMethods with MetadataKeys with LazyLogging { - import Implicits.WithDataFrameMethods - import Implicits.WithRasterFrameMethods + import Implicits.{WithDataFrameMethods, WithRasterFrameMethods} private val _stableDF = self import _stableDF.sqlContext.implicits._ - /** Get the names of the columns that are of type `Tile` */ - def tileColumns: Seq[TypedColumn[Any, Tile]] = - self.schema.fields - .filter(_.dataType.typeName.equalsIgnoreCase(TileUDT.typeName)) - .map(f ⇒ col(f.name).as[Tile]) + + /** + * A convenience over `DataFrame.withColumnRenamed` whereby the `RasterFrame` type is maintained. + */ + def withRFColumnRenamed(existingName: String, newName: String): RasterFrame = + (self: DataFrame).withColumnRenamed(existingName, newName).certify /** Get the spatial column. */ def spatialKeyColumn: TypedColumn[Any, SpatialKey] = { - val key = findSpatialKeyField + val key = self.findSpatialKeyField key .map(_.name) .map(col(_).as[SpatialKey]) .getOrElse(throw new IllegalArgumentException("All RasterFrames must have a column tagged with context")) } - /** Get the temporal column, if any. */ - def temporalKeyColumn: Option[TypedColumn[Any, TemporalKey]] = { - val key = findTemporalKeyField - key.map(_.name).map(col(_).as[TemporalKey]) - } - - private[rasterframes] def findRoleField(role: String): Option[StructField] = - self.schema.fields.find( - f ⇒ - f.metadata.contains(SPATIAL_ROLE_KEY) && - f.metadata.getString(SPATIAL_ROLE_KEY) == role - ) - - /** The spatial key is the first one found with context metadata attached to it. */ - private[rasterframes] def findSpatialKeyField: Option[StructField] = - findRoleField(SPATIAL_KEY_COLUMN.columnName) - - /** The temporal key is the first one found with the temporal tag. */ - private[rasterframes] def findTemporalKeyField: Option[StructField] = - findRoleField(TEMPORAL_KEY_COLUMN.columnName) - /** * Reassemble the [[TileLayerMetadata]] record from DataFrame metadata. */ def tileLayerMetadata: Either[TileLayerMetadata[SpatialKey], TileLayerMetadata[SpaceTimeKey]] = { - val spatialMD = findSpatialKeyField + val spatialMD = self.findSpatialKeyField .map(_.metadata) .getOrElse(throw new IllegalArgumentException(s"RasterFrame operation requsted on non-RasterFrame: $self")) - if (findTemporalKeyField.nonEmpty) + if (self.findTemporalKeyField.nonEmpty) Right(extract[TileLayerMetadata[SpaceTimeKey]](CONTEXT_METADATA_KEY)(spatialMD)) else Left(extract[TileLayerMetadata[SpatialKey]](CONTEXT_METADATA_KEY)(spatialMD)) @@ -107,7 +83,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] /** Add a temporal key to the RasterFrame, assigning the same temporal key to all rows. */ def addTemporalComponent(value: TemporalKey): RasterFrame = { - require(temporalKeyColumn.isEmpty, "RasterFrame already has a temporal component") + require(self.temporalKeyColumn.isEmpty, "RasterFrame already has a temporal component") val tlm = tileLayerMetadata.left.get val newTlm = tlm.map(k ⇒ SpaceTimeKey(k, value)) @@ -204,8 +180,8 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] */ def clipLayerExtent: RasterFrame = { val metadata = tileLayerMetadata - val extent = metadata.fold(_.extent, _.extent) - val layout = metadata.fold(_.layout, _.layout) + val extent = metadata.widen.extent + val layout = metadata.widen.layout val trans = layout.mapTransform def updateBounds[T: SpatialComponent: Boundable: JsonFormat: TypeTag](tlm: TileLayerMetadata[T], @@ -216,16 +192,16 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] val gridExtent = trans(keyBounds.toGridBounds()) val newExtent = gridExtent.intersection(extent).getOrElse(gridExtent) - self.setSpatialColumnRole(spatialKeyColumn, tlm.copy(extent = newExtent, bounds = keyBounds)) + self.setSpatialColumnRole(self.spatialKeyColumn, tlm.copy(extent = newExtent, bounds = keyBounds)) } val df = metadata.fold( - tlm ⇒ updateBounds(tlm, self.select(spatialKeyColumn)), + tlm ⇒ updateBounds(tlm, self.select(self.spatialKeyColumn)), tlm ⇒ { updateBounds( tlm, self - .select(spatialKeyColumn, temporalKeyColumn.get) + .select(self.spatialKeyColumn, self.temporalKeyColumn.get) .map { case (s, t) ⇒ SpaceTimeKey(s, t) } ) } @@ -240,10 +216,10 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] */ def toTileLayerRDD(tileCol: Column): Either[TileLayerRDD[SpatialKey], TileLayerRDD[SpaceTimeKey]] = tileLayerMetadata.fold( - tlm ⇒ Left(ContextRDD(self.select(spatialKeyColumn, tileCol.as[Tile]).rdd, tlm)), + tlm ⇒ Left(ContextRDD(self.select(self.spatialKeyColumn, tileCol.as[Tile]).rdd, tlm)), tlm ⇒ { val rdd = self - .select(spatialKeyColumn, temporalKeyColumn.get, tileCol.as[Tile]) + .select(self.spatialKeyColumn, self.temporalKeyColumn.get, tileCol.as[Tile]) .rdd .map { case (sk, tk, v) ⇒ (SpaceTimeKey(sk, tk), v) } Right(ContextRDD(rdd, tlm)) @@ -252,10 +228,14 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] /** Convert all the tile columns in a Rasterrame to a GeoTrellis [[MultibandTileLayerRDD]] */ def toMultibandTileLayerRDD: Either[MultibandTileLayerRDD[SpatialKey], MultibandTileLayerRDD[SpaceTimeKey]] = + toMultibandTileLayerRDD(self.tileColumns: _*) + + /** Convert the specified tile columns in a Rasterrame to a GeoTrellis [[MultibandTileLayerRDD]] */ + def toMultibandTileLayerRDD(tileCols: Column*): Either[MultibandTileLayerRDD[SpatialKey], MultibandTileLayerRDD[SpaceTimeKey]] = tileLayerMetadata.fold( tlm ⇒ { val rdd = self - .select(spatialKeyColumn, array(self.tileColumns: _*)).as[(SpatialKey, Array[Tile])] + .select(self.spatialKeyColumn, array(tileCols: _*)).as[(SpatialKey, Array[Tile])] .rdd .map { case (sk, tiles) ⇒ (sk, MultibandTile(tiles)) @@ -264,7 +244,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] }, tlm ⇒ { val rdd = self - .select(spatialKeyColumn, temporalKeyColumn.get, array(self.tileColumns: _*)).as[(SpatialKey, TemporalKey, Array[Tile])] + .select(self.spatialKeyColumn, self.temporalKeyColumn.get, array(tileCols: _*)).as[(SpatialKey, TemporalKey, Array[Tile])] .rdd .map { case (sk, tk, tiles) ⇒ (SpaceTimeKey(sk, tk), MultibandTile(tiles)) } Right(ContextRDD(rdd, tlm)) @@ -275,8 +255,50 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] private[rasterframes] def extract[M: JsonFormat](metadataKey: String)(md: Metadata) = md.getMetadata(metadataKey).json.parseJson.convertTo[M] - /** Convert the tiles in the RasterFrame into a single raster. For RasterFrames keyed with temporal keys, this - * will merge based */ + // TODO: Take care of DRY below +// private def rasterize[T <: CellGrid: TypeTag]( +// tileCols: Seq[Column], +// rasterCols: Int, +// rasterRows: Int, +// resampler: ResampleMethod): ProjectedRaster[T] = { +// +// val clipped = clipLayerExtent +// +// val md = clipped.tileLayerMetadata.widen +// val newLayout = LayoutDefinition(md.extent, TileLayout(1, 1, rasterCols, rasterRows)) +// +// val trans = md.mapTransform +// +// //val cellType = rdd.first()._2.cellType +// val keyBounds = Bounds(SpatialKey(0, 0), SpatialKey(0, 0)) +// val newLayerMetadata = +// md.copy(layout = newLayout, bounds = keyBounds) +// +// +// val newLayer = typeOf[T] match { +// case tpe if tpe <:< typeOf[Tile] ⇒ +// val r = clipped.toTileLayerRDD(tileCols.head) +// .fold(identity, _.map { case (stk, t) ⇒ (stk.spatialKey, t) }) // <-- Drops the temporal key outright +// .map { case (key, tile) ⇒ (ProjectedExtent(trans(key), md.crs), tile) } +// ContextRDD(r, md) +// .tileToLayout(newLayerMetadata, Tiler.Options(resampler)) +// case tpe if tpe <:< typeOf[MultibandTile] ⇒ +// val r = clipped.toMultibandTileLayerRDD(tileCols: _*) +// .fold(identity, _.map { case (stk, t) ⇒ (stk.spatialKey, t) }) // <-- Drops the temporal key outright +// .map { case (key, tile) ⇒ (ProjectedExtent(trans(key), md.crs), tile) } +// ContextRDD(r, md) +// .tileToLayout(newLayerMetadata, Tiler.Options(resampler)) +// } +// +// val stitchedTile = newLayer.stitch() +// +// val croppedTile = stitchedTile.crop(rasterCols, rasterRows) +// +// ProjectedRaster(croppedTile, md.extent, md.crs) +// } + + /** Convert the tiles in the RasterFrame into a single raster. For RasterFrames keyed with temporal keys, they + * will be merge undeterministically. */ def toRaster(tileCol: Column, rasterCols: Int, rasterRows: Int, @@ -286,10 +308,10 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] val md = clipped.tileLayerMetadata.widen val trans = md.mapTransform - val keyCol = clipped.spatialKeyColumn val newLayout = LayoutDefinition(md.extent, TileLayout(1, 1, rasterCols, rasterRows)) - val rdd: RDD[(SpatialKey, Tile)] = clipped.select(keyCol, tileCol).as[(SpatialKey, Tile)].rdd + val rdd = clipped.toTileLayerRDD(tileCol) + .fold(identity, _.map{ case(stk, t) ⇒ (stk.spatialKey, t) }) // <-- Drops the temporal key outright val cellType = rdd.first()._2.cellType @@ -310,4 +332,39 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] ProjectedRaster(croppedTile, md.extent, md.crs) } + /** Convert the Red, Green & Blue assigned tiles in the RasterFrame into a single color composite raster. + * For RasterFrames keyed with temporal keys, they will be merged underterministically. */ + def toMultibandRaster( + tileCols: Seq[Column], + rasterCols: Int, + rasterRows: Int, + resampler: ResampleMethod = Bilinear): ProjectedRaster[MultibandTile] = { + + val clipped = clipLayerExtent + + val md = clipped.tileLayerMetadata.widen + val trans = md.mapTransform + val newLayout = LayoutDefinition(md.extent, TileLayout(1, 1, rasterCols, rasterRows)) + + val rdd = clipped.toMultibandTileLayerRDD(tileCols: _*) + .fold(identity, _.map{ case(stk, t) ⇒ (stk.spatialKey, t)}) // <-- Drops the temporal key outright + + val cellType = rdd.first()._2.cellType + + val newLayerMetadata = + md.copy(layout = newLayout, bounds = Bounds(SpatialKey(0, 0), SpatialKey(0, 0)), cellType = cellType) + + val newLayer = rdd + .map { + case (key, tile) ⇒ + (ProjectedExtent(trans(key), md.crs), tile) + } + .tileToLayout(newLayerMetadata, Tiler.Options(resampler)) + + val stitchedTile = newLayer.stitch() + + val croppedTile = stitchedTile.crop(rasterCols, rasterRows) + + ProjectedRaster(croppedTile, md.extent, md.crs) + } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala b/core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala new file mode 100644 index 000000000..fdaf70b7d --- /dev/null +++ b/core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala @@ -0,0 +1,120 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea. Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * + */ + +package astraea.spark.rasterframes.util + +import geotrellis.raster._ +import geotrellis.raster.render.Png + +/** + * Rework of process courtesy of @lossyrob for creating natural color RGB images in GeoTrellis. + * + * Source: https://goo.gl/9ewJCG + * + * @since 3/27/18 + */ +object MultibandRender { + object CellTransforms { + def clamp(min: Int, max: Int)(z: Int) = { + if(isData(z)) { if(z > max) { max } else if(z < min) { min } else { z } } + else { z } + } + + val clampByte = clamp(0, 255) _ + + def brightnessCorrect(brightness: Int)(v: Int): Int = + if(v > 0) { v + brightness } + else { v } + + def contrastCorrect(contrast: Int)(v: Int): Int = { + val contrastFactor = (259 * (contrast + 255)) / (255 * (259 - contrast)) + (contrastFactor * (v - 128)) + 128 + } + + def gammaCorrect(gamma: Double)(v: Int): Int = { + val gammaCorrection = 1 / gamma + (255 * math.pow(v / 255.0, gammaCorrection)).toInt + } + } + import CellTransforms._ + + trait Profile { + /** Value from -255 to 255 */ + val brightness: Int = 0 + /** Value from -255 to 255 */ + val contrast: Int = 0 + /** 0.01 to 7.99 */ + val gamma: Double = 1.0 + + /** Get the red band. */ + def red(mb: MultibandTile): Tile = mb.band(0) + /** Get the green band. */ + def green(mb: MultibandTile): Tile = mb.band(1) + /** Get the blue band. */ + def blue(mb: MultibandTile): Tile = mb.band(2) + + /** Convert the tile to an Int-based cell type. */ + def normalizeCellType(tile: Tile): Tile = tile.convert(IntCellType) + + /** Convert tile such that cells values fall between 0 and 255. */ + def compressRange(tile: Tile): Tile = tile + + /** Apply color correction so it "looks nice". */ + def colorAdjust(tile: Tile): Tile = { + val pipeline = + brightnessCorrect(brightness) _ andThen + clampByte andThen + gammaCorrect(gamma) andThen + clampByte andThen + contrastCorrect(contrast) andThen + clampByte + + normalizeCellType(tile).map(pipeline) + } + + val applyAdjustment = compressRange _ andThen colorAdjust + } + case object Default extends Profile + + case object Landsat8NaturalColor extends Profile { + // @lossyrob magic numbers: "Fiddled with until visually it looked ok. ¯\_(ツ)_/¯" + override val brightness = 15 + override val contrast = 30 + override val gamma = 0.8 + val (clampMin, clampMax) = (4000, 15176) + + override def compressRange(tile: Tile): Tile = { + val clamper = clamp(clampMin, clampMax) _ + tile.map(clamper).normalize(clampMin, clampMax, 0, 255) + } + } + + case object NAIPNaturalColor extends Profile { + override val gamma = 1.4 + override def compressRange(tile: Tile): Tile = tile.rescale(0, 255) + } + + def rgbComposite(tile: MultibandTile, profile: Profile): Png = { + val red = profile.applyAdjustment(profile.red(tile)) + val green = profile.applyAdjustment(profile.green(tile)) + val blue = profile.applyAdjustment(profile.blue(tile)) + ArrayMultibandTile(red, green, blue).renderPng + } +} diff --git a/core/src/test/resources/NAIP-VA-b1.tiff b/core/src/test/resources/NAIP-VA-b1.tiff new file mode 100644 index 000000000..65140ed09 Binary files /dev/null and b/core/src/test/resources/NAIP-VA-b1.tiff differ diff --git a/core/src/test/resources/NAIP-VA-b2.tiff b/core/src/test/resources/NAIP-VA-b2.tiff new file mode 100644 index 000000000..3e46fd32f Binary files /dev/null and b/core/src/test/resources/NAIP-VA-b2.tiff differ diff --git a/core/src/test/resources/NAIP-VA-b3.tiff b/core/src/test/resources/NAIP-VA-b3.tiff new file mode 100644 index 000000000..ad107c38c Binary files /dev/null and b/core/src/test/resources/NAIP-VA-b3.tiff differ diff --git a/core/src/test/resources/NAIP-VA-b4.tiff b/core/src/test/resources/NAIP-VA-b4.tiff new file mode 100644 index 000000000..d9d9eaeb0 Binary files /dev/null and b/core/src/test/resources/NAIP-VA-b4.tiff differ diff --git a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala index 1a7346f98..fb08104f0 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala +++ b/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala @@ -21,7 +21,6 @@ package astraea.spark.rasterframes import geotrellis.raster._ import geotrellis.raster.resample.NearestNeighbor -import org.apache.spark.sql.functions._ /** diff --git a/core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala new file mode 100644 index 000000000..1834f1fa2 --- /dev/null +++ b/core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala @@ -0,0 +1,50 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea. Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * + */ + +package astraea.spark.rasterframes + +/** + * Tests miscellaneous extension methods. + * + * @since 3/20/18 + */ +//noinspection ScalaUnusedSymbol +class ExtensionMethodSpec extends TestEnvironment with TestData { + lazy val rf = sampleTileLayerRDD.toRF + + describe("DataFrame exention methods") { + it("should maintain original type") { + val df = rf.withPrefixedColumnNames("_foo_") + "val rf2: RasterFrame = df" should compile + } + it("should provide tagged column access") { + val df = rf.drop("tile") + "val Some(col) = df.spatialKeyColumn" should compile + } + } + describe("RasterFrame exention methods") { + it("should provide spatial key column") { + noException should be thrownBy { + rf.spatialKeyColumn + } + "val Some(col) = rf.spatialKeyColumn" shouldNot compile + } + } +} diff --git a/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala index 7850bea6d..18fd4b761 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala +++ b/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala @@ -6,8 +6,8 @@ import java.sql.Timestamp import java.time.ZonedDateTime import geotrellis.proj4.LatLng -import geotrellis.raster.render.{ColorMap, ColorRamp} -import geotrellis.raster.{ProjectedRaster, Tile, TileFeature, TileLayout} +import geotrellis.raster.render.{ColorMap, ColorRamp, ColorRamps} +import geotrellis.raster.{IntCellType, ProjectedRaster, Tile, TileFeature, TileLayout} import geotrellis.spark._ import geotrellis.spark.io._ import geotrellis.spark.tiling._ @@ -45,7 +45,8 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys val df1 = baseDF.withPrefixedColumnNames("ONE_") val df2 = baseDF.withPrefixedColumnNames("TWO_") - + val spark = df1.sparkSession + import spark.implicits._ assert(df1.columns.forall(_.startsWith("ONE_"))) assert(df2.columns.forall(_.startsWith("TWO_"))) assert(df1.join(df2, $"ONE_int" === $"TWO_int").columns === df1.columns ++ df2.columns) @@ -82,8 +83,6 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys val rf = tileLayerRDD.toRF - //rf.printSchema() - //rf.show() try { assert(rf.tileColumns.nonEmpty) assert(rf.spatialKeyColumn.columnName === "spatial_key") @@ -271,11 +270,9 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys it("should rasterize with a spatiotemporal key") { val rf = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toRF - val md = rf.schema.fields(0).metadata - - //println(rf.extract[TileLayerMetadata[SpaceTimeKey]](CONTEXT_METADATA_KEY)(md)) - - rf.toRaster($"tile", 128, 128) + noException shouldBe thrownBy { + rf.toRaster($"tile", 128, 128) + } } it("should maintain metadata after all spatial join operations") { @@ -290,6 +287,35 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } } + it("should rasterize multiband") { + withClue("Landsat") { + val blue = TestData.l8Sample(1).projectedRaster.toRF.withRFColumnRenamed("tile", "blue") + val green = TestData.l8Sample(2).projectedRaster.toRF.withRFColumnRenamed("tile", "green") + val red = TestData.l8Sample(3).projectedRaster.toRF.withRFColumnRenamed("tile", "red") + + val joined = blue.spatialJoin(green).spatialJoin(red) + + noException shouldBe thrownBy { + val raster = joined.toMultibandRaster(Seq($"red", $"green", $"blue"), 128, 128) + val png = MultibandRender.rgbComposite(raster.tile, MultibandRender.Landsat8NaturalColor) + //png.write(s"target/${getClass.getSimpleName}.png") + } + } + withClue("NAIP") { + val red = TestData.naipSample(1).projectedRaster.toRF.withRFColumnRenamed("tile", "red") + val green = TestData.naipSample(2).projectedRaster.toRF.withRFColumnRenamed("tile", "green") + val blue = TestData.naipSample(3).projectedRaster.toRF.withRFColumnRenamed("tile", "blue") + val joined = blue.spatialJoin(green).spatialJoin(red) + joined.printSchema + + noException shouldBe thrownBy { + val raster = joined.toMultibandRaster(Seq($"red", $"green", $"blue"), 256, 256) + val png = MultibandRender.rgbComposite(raster.tile, MultibandRender.NAIPNaturalColor) + png.write(s"target/${getClass.getSimpleName}.png") + } + } + } + it("should restitch to raster") { // 774 × 500 val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster diff --git a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala b/core/src/test/scala/astraea/spark/rasterframes/TestData.scala index 75f3242c0..ade97090b 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala +++ b/core/src/test/scala/astraea/spark/rasterframes/TestData.scala @@ -102,6 +102,11 @@ trait TestData { } def l8Labels = readSingleband("L8-Labels-Elkton-VA.tiff") + def naipSample(band: Int) = { + require((1 to 4).contains(band), "Invalid band number") + readSingleband(s"NAIP-VA-b$band.tiff") + } + def sampleTileLayerRDD(implicit spark: SparkSession): TileLayerRDD[SpatialKey] = { val raster = sampleGeoTiff.projectedRaster.reproject(LatLng) val layout = LayoutDefinition(LatLng.worldExtent, TileLayout(36, 18, 128, 128)) diff --git a/datasource/build.sbt b/datasource/build.sbt index 8e89238e6..41c933ff9 100644 --- a/datasource/build.sbt +++ b/datasource/build.sbt @@ -9,3 +9,31 @@ libraryDependencies ++= Seq( // Run generateDocs to help convert examples to tut docs. //docsMap := Map(baseDirectory.value / "src" / "test" -> target.value / "literator" ) + + +initialCommands in console := """ + |import astraea.spark.rasterframes._ + |import geotrellis.raster._ + |import geotrellis.spark.io.kryo.KryoRegistrator + |import org.apache.spark.serializer.KryoSerializer + |import org.apache.spark.sql._ + |import org.apache.spark.sql.functions._ + |import astraea.spark.rasterframes.datasource.geotrellis._ + |import astraea.spark.rasterframes.datasource.geotiff._ + |implicit val spark = SparkSession.builder() + | .master("local[*]") + | .appName(getClass.getName) + | .config("spark.serializer", classOf[KryoSerializer].getName) + | .config("spark.kryoserializer.buffer.max", "500m") + | .config("spark.kryo.registrationRequired", "false") + | .config("spark.kryo.registrator", classOf[KryoRegistrator].getName) + | .getOrCreate() + | .withRasterFrames + |spark.sparkContext.setLogLevel("ERROR") + |import spark.implicits._ + | +""".stripMargin + +cleanupCommands in console := """ + |spark.stop() + """.stripMargin \ No newline at end of file diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala index d5f9f4b94..31b75f4bc 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala +++ b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala @@ -24,6 +24,7 @@ import java.net.URI import org.apache.spark.annotation.Experimental import org.apache.spark.sql.SQLContext import org.apache.spark.sql.sources.{DataSourceRegister, RelationProvider} +import astraea.spark.rasterframes._ /** * @@ -36,6 +37,7 @@ class DefaultSource extends DataSourceRegister with RelationProvider { def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = { require(parameters.contains("path"), "'path' parameter required.") val uri: URI = URI.create(parameters("path")) + sqlContext.withRasterFrames GeoTiffRelation(sqlContext, uri) } } diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala index d0c9bfde5..1d14b7ec8 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala +++ b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala @@ -37,6 +37,8 @@ import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{Row, SQLContext} import astraea.spark.rasterframes.util._ +import geotrellis.raster.TileLayout +import geotrellis.raster.io.geotiff.MultibandGeoTiff /** * Spark SQL data source over a single GeoTiff file. Works best with CoG compliant ones. @@ -52,8 +54,27 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio ) } + val MAX_SIZE = 256 + def defaultLayout(cols: Int, rows: Int): TileLayout = { + def divs(cells: Int) = { + val layoutDivs = math.ceil(cells / MAX_SIZE.toFloat) + val tileDivs = math.ceil(cells / layoutDivs) + (layoutDivs.toInt, tileDivs.toInt) + } + val (layoutCols, tileCols) = divs(rows) + val (layoutRows, tileRows) = divs(rows) + TileLayout(layoutCols, layoutRows, tileCols, tileRows) + } + lazy val tileLayerMetadata: TileLayerMetadata[SpatialKey] = { - val layout = info.segmentLayout.tileLayout + val layout = if(!info.segmentLayout.isTiled) { + val width = info.segmentLayout.totalCols + val height = info.segmentLayout.totalRows + defaultLayout(width, height) + } + else { + info.segmentLayout.tileLayout + } val extent = info.extent val crs = info.crs val cellType = info.cellType @@ -62,6 +83,7 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio SpatialKey(layout.layoutCols - 1, layout.layoutRows - 1) ) TileLayerMetadata(cellType, LayoutDefinition(extent, layout), extent, crs, bounds) + } def schema: StructType = { @@ -70,8 +92,6 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio .attachContext(tileLayerMetadata.asColumnMetadata) .tagSpatialKey.build - val extentSchema = ExpressionEncoder[Extent]().schema - val baseName = TILE_COLUMN.columnName val tileCols = (if (info.bandCount == 1) Seq(baseName) else { @@ -82,7 +102,7 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio StructType(Seq( StructField(SPATIAL_KEY_COLUMN.columnName, skSchema, nullable = false, skMetadata), - StructField(BOUNDS_COLUMN.columnName, extentSchema, nullable = false), + StructField(BOUNDS_COLUMN.columnName, org.apache.spark.sql.jts.JTSTypes.PolygonTypeInstance, nullable = true), StructField(METADATA_COLUMN.columnName, DataTypes.createMapType(StringType, StringType, false) ) @@ -97,23 +117,41 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio val columnIndexes = requiredColumns.map(schema.fieldIndex) val tlm = tileLayerMetadata - val mapTransform = tlm.mapTransform + val trans = tlm.mapTransform val metadata = info.tags.headTags - // TODO: Figure out how to do tile filtering via the range reader. - // Something with geotrellis.spark.io.GeoTiffInfoReader#windowsByPartition? - HadoopGeoTiffRDD.spatialMultiband(new Path(uri), HadoopGeoTiffRDD.Options.DEFAULT) - .map { case (pe, tiles) ⇒ - // NB: I think it's safe to take the min coord of the - // transform result because the layout is directly from the TIFF - val gb = mapTransform.extentToBounds(pe.extent) - val entries = columnIndexes.map { - case 0 ⇒ SpatialKey(gb.colMin, gb.rowMin) - case 1 ⇒ pe.extent - case 2 ⇒ metadata - case n ⇒ tiles.band(n - 3) + if(info.segmentLayout.isTiled) { + // TODO: Figure out how to do tile filtering via the range reader. + // Something with geotrellis.spark.io.GeoTiffInfoReader#windowsByPartition? + HadoopGeoTiffRDD.spatialMultiband(new Path(uri), HadoopGeoTiffRDD.Options.DEFAULT) + .map { case (pe, tiles) ⇒ + // NB: I think it's safe to take the min coord of the + // transform result because the layout is directly from the TIFF + val gb = trans.extentToBounds(pe.extent) + val entries = columnIndexes.map { + case 0 ⇒ SpatialKey(gb.colMin, gb.rowMin) + case 1 ⇒ pe.extent.jtsGeom + case 2 ⇒ metadata + case n ⇒ tiles.band(n - 3) + } + Row(entries: _*) + } + } + else { + logger.warn("GeoTIFF is not already tiled. In-memory read required: " + uri) + val geotiff = HadoopGeoTiffReader.readMultiband(new Path(uri)) + val rdd = sqlContext.sparkContext.makeRDD(Seq((geotiff.projectedExtent, geotiff.tile))) + + rdd.tileToLayout(tlm) + .map { case (sk, tiles) ⇒ + val entries = columnIndexes.map { + case 0 ⇒ sk + case 1 ⇒ trans.keyToExtent(sk).jtsGeom + case 2 ⇒ metadata + case n ⇒ tiles.band(n - 3) + } + Row(entries: _*) } - Row(entries: _*) - } + } } } diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala index d4e57364a..92137ecb3 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala +++ b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala @@ -55,6 +55,8 @@ class DefaultSource extends DataSourceRegister with RelationProvider with Creata require(parameters.contains("layer"), "'layer' parameter for raster layer name required.") require(parameters.contains("zoom"), "'zoom' parameter for raster layer zoom level required.") + sqlContext.withRasterFrames + registerOptimization(sqlContext, SpatialFilterPushdownRules) val uri: URI = URI.create(parameters("path")) diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala index 2c125c863..95599f3c1 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala +++ b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala @@ -177,7 +177,7 @@ case class GeoTrellisRelation(sqlContext: SQLContext, ) } - val extentField = StructField(Cols.EX, org.apache.spark.sql.jts.JTSTypes.PolygonTypeInstance, false) + val extentField = StructField(Cols.EX, org.apache.spark.sql.jts.JTSTypes.PolygonTypeInstance, true) StructType((keyFields :+ extentField) ++ tileFields) } diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala b/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala index 8e57cad95..00e8b5d4a 100644 --- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala +++ b/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala @@ -28,6 +28,7 @@ class GeoTiffDataSourceSpec with IntelliJPresentationCompilerHack { val cogPath = getClass.getResource("/LC08_RGB_Norfolk_COG.tiff").toURI + val nonCogPath = getClass.getResource("/L8-B8-Robinson-IL.tiff").toURI describe("GeoTiff reading") { @@ -39,6 +40,46 @@ class GeoTiffDataSourceSpec assert(rf.count() > 10) } + it("should lay out tiles correctly"){ + + val rf = spark.read + .geotiff + .loadRF(cogPath) + + val tlm = rf.tileLayerMetadata.left.get + val gb = tlm.gridBounds + assert(gb.colMax > gb.colMin) + assert(gb.rowMax > gb.rowMin) + } + + it("should lay out tiles correctly for non-tiled tif") { + val rf = spark.read + .geotiff + .loadRF(nonCogPath) + + println(rf.count()) + rf.show(false) + + assert(rf.count() > 1) + + import spark.implicits._ + import org.apache.spark.sql.functions._ + logger.info( + rf.agg( + min(col("spatial_key.row")) as "rowmin", + max(col("spatial_key.row")) as "rowmax", + min(col("spatial_key.col")) as "colmin", + max(col("spatial_key.col")) as "colmax" + + ).first.toSeq.toString() + ) + val tlm = rf.tileLayerMetadata.left.get + val gb = tlm.gridBounds + assert(gb.rowMax > gb.rowMin) + assert(gb.colMax > gb.colMin) + + } + it("should write RF to parquet") { val rf = spark.read .geotiff diff --git a/docs/src/main/tut/apps/geotrellis-ops.md b/docs/src/main/tut/apps/geotrellis-ops.md index 589affb45..22ce905e3 100644 --- a/docs/src/main/tut/apps/geotrellis-ops.md +++ b/docs/src/main/tut/apps/geotrellis-ops.md @@ -32,8 +32,7 @@ Here's an example downsampling a tile and rendering each tile as a matrix of num ```tut val downsample = udf((t: Tile) => t.resample(4, 4)) -val render = udf((t: Tile) => "\n" + t.asciiDraw() + "\n") -val downsampled = rf.select(render(downsample($"tile")) as "minime") +val downsampled = rf.select(renderAscii(downsample($"tile")) as "minime") downsampled.show(5, false) ``` diff --git a/docs/src/main/tut/release-notes.md b/docs/src/main/tut/release-notes.md index ffe8c0f1e..451955129 100644 --- a/docs/src/main/tut/release-notes.md +++ b/docs/src/main/tut/release-notes.md @@ -1,6 +1,18 @@ # Release Notes -## 0.6.0 +## 0.6.x + +### 0.6.1 + +* Added support for reading striped GeoTiffs (#64). +* Moved extension methods associated with querying tagged columns to `DataFrameMethods` for supporting + temporal and spatial columns on non-RasterFrame DataFrames. +* GeoTIFF and GeoTrellis DataSources automatically initialize RasterFrames. +* Added `RasterFrame.toMultibandRaster`. +* Added utility for rendering multiband tile as RGB composite PNG. +* Added `RasterFrame.withRFColumnRenamed` to lessen boilerplate in maintaining `RasterFrame` type tag. + +### 0.6.0 * Upgraded to Spark 2.2.1. Added `VersionShims` to allow for Spark 2.1.x backwards compatibility. * Introduced separate `rasterframes-datasource` library for hosting sources from which to read RasterFrames. diff --git a/src/main/scala/astraea/spark/rasterframes/RFSpatialColumnMethods.scala b/src/main/scala/astraea/spark/rasterframes/RFSpatialColumnMethods.scala deleted file mode 100644 index 96a3b1671..000000000 --- a/src/main/scala/astraea/spark/rasterframes/RFSpatialColumnMethods.scala +++ /dev/null @@ -1,107 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package astraea.spark.rasterframes - -import geotrellis.proj4.LatLng -import geotrellis.spark.SpatialKey -import geotrellis.spark.tiling.MapKeyTransform -import geotrellis.util.MethodExtensions -import geotrellis.vector.Extent -import org.apache.spark.sql.Row -import org.apache.spark.sql.functions.{asc, udf} -import org.apache.spark.sql.types.{DoubleType, StructField, StructType} -import org.locationtech.geomesa.curve.Z2SFC - -/** - * RasterFrame extension methods associated with adding spatially descriptive columns. - * - * @author sfitch - * @since 12/15/17 - */ -trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] { - /** Returns the key-space to map-space coordinate transform. */ - def mapTransform: MapKeyTransform = self.tileLayerMetadata.widen.mapTransform - - private def keyCol2Extent: Row ⇒ Extent = { - val transform = self.sparkSession.sparkContext.broadcast(mapTransform) - (r: Row) ⇒ transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1))) - } - - private def keyCol2LatLng: Row ⇒ (Double, Double) = { - val transform = self.sparkSession.sparkContext.broadcast(mapTransform) - val crs = self.tileLayerMetadata.widen.crs - (r: Row) ⇒ { - val center = transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1))).center.reproject(crs, LatLng) - (center.x, center.y) - } - } - - /** - * Append a column containing the extent of the row's spatial key. - * Coordinates are in native CRS. - * @param colName name of column to append. Defaults to "extent" - * @return updated RasterFrame - */ - def withExtent(colName: String = "extent"): RasterFrame = { - val key2Extent = udf(keyCol2Extent) - self.withColumn(colName, key2Extent(self.spatialKeyColumn)).certify - } - - /** - * Append a column containing the center of the row's spatial key. - * Coordinate is in native CRS. - * @param colName name of column to append. Defaults to "center" - * @return updated RasterFrame - */ - def withCenter(colName: String = "center"): RasterFrame = { - val key2Center = udf(keyCol2Extent andThen (_.center) andThen (c ⇒ (c.x, c.y))) - self.withColumn(colName, key2Center(self.spatialKeyColumn).cast(RFSpatialColumnMethods.PointStructType)).certify - } - - /** - * Append a column containing the center of the row's spatial key. - * Coordinate is in (longitude, latitude) (EPSG:4326). - * @param colName name of column to append. Defaults to "center" - * @return updated RasterFrame - */ - def withCenterLatLng(colName: String = "center"): RasterFrame = { - val key2Center = udf(keyCol2LatLng) - self.withColumn(colName, key2Center(self.spatialKeyColumn).cast(RFSpatialColumnMethods.LngLatStructType)).certify - } - - /** - * Appends a spatial index column - * @param colName name of new column to create. Defaults to `index` - * @param applyOrdering if true, adds `.orderBy(asc(colName))` to result. Defaults to `true` - * @return RasterFrame with index column. - */ - def withSpatialIndex(colName: String = "spatial_index", applyOrdering: Boolean = true): RasterFrame = { - val zindex = udf(keyCol2LatLng andThen (p ⇒ Z2SFC.index(p._1, p._2).z)) - self.withColumn(colName, zindex(self.spatialKeyColumn)) match { - case rf if applyOrdering ⇒ rf.orderBy(asc(colName)).certify - case rf ⇒ rf.certify - } - } -} - -object RFSpatialColumnMethods { - private val PointStructType = StructType(Seq(StructField("x", DoubleType), StructField("y", DoubleType))) - private val LngLatStructType = StructType(Seq(StructField("longitude", DoubleType), StructField("latitude", DoubleType))) -} diff --git a/src/main/scala/astraea/spark/rasterframes/expressions/package.scala b/src/main/scala/astraea/spark/rasterframes/expressions/package.scala deleted file mode 100644 index 9137c79f0..000000000 --- a/src/main/scala/astraea/spark/rasterframes/expressions/package.scala +++ /dev/null @@ -1,88 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package astraea.spark.rasterframes - -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} -import org.apache.spark.sql.gt.InternalRowTile -import org.apache.spark.sql.gt.types.TileUDT -import org.apache.spark.sql.types._ - -/** - * Module of Catalyst expressions for efficiently working with tiles. - * - * @author sfitch - * @since 10/10/17 - */ -package object expressions { - import InternalRowTile.C._ - private def row(input: Any) = input.asInstanceOf[InternalRow] - - protected trait RequiresTile { self: UnaryExpression ⇒ - abstract override def checkInputDataTypes(): TypeCheckResult = { - if(child.dataType.isInstanceOf[TileUDT]) TypeCheckSuccess - else TypeCheckFailure( - s"Expected '${TileUDT.typeName}' but received '${child.dataType.simpleString}'" - ) - } - } - - /** Extract a Tile's cell type */ - case class CellType(child: Expression) extends UnaryExpression with RequiresTile { - - def dataType: DataType = StringType - - override protected def nullSafeEval(input: Any): Any = - row(input).getUTF8String(CELL_TYPE) - - protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = - defineCodeGen(ctx, ev, c ⇒ s"$c.getUTF8String($CELL_TYPE);") - } - - /** Extract a Tile's dimensions */ - case class Dimensions(child: Expression) extends UnaryExpression with RequiresTile { - def dataType = StructType(Seq( - StructField("cols", ShortType), - StructField("rows", ShortType) - )) - - override protected def nullSafeEval(input: Any): Any = { - val r = row(input) - val cols = r.getShort(COLS) - val rows = r.getShort(ROWS) - InternalRow(cols, rows) - } - - protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val cols = ctx.freshName("cols") - val rows = ctx.freshName("rows") - nullSafeCodeGen(ctx, ev, eval ⇒ - s""" - final short $cols = $eval.getShort($COLS); - final short $rows = $eval.getShort($ROWS); - ${ev.value} = new GenericInternalRow(new Object[] { $cols, $rows }); - """ - ) - } - } -} diff --git a/src/main/scala/org/apache/spark/sql/gt/InternalRowTile.scala b/src/main/scala/org/apache/spark/sql/gt/InternalRowTile.scala deleted file mode 100644 index 6c03b2cfc..000000000 --- a/src/main/scala/org/apache/spark/sql/gt/InternalRowTile.scala +++ /dev/null @@ -1,274 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package org.apache.spark.sql.gt - -import java.nio.ByteBuffer - -import geotrellis.raster._ -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.UTF8String - -/** - * Wrapper around a `Tile` encoded in a Catalyst `InternalRow`, for the purpose - * of providing compatible semantics over common operations. - * - * @groupname COPIES Memory Copying - * @groupdesc COPIES Requires creating an intermediate copy of - * the complete `Tile` contents, and should be avoided. - * - * @author sfitch - * @since 11/29/17 - */ -class InternalRowTile(mem: InternalRow) extends ArrayTile { - import InternalRowTile._ - import InternalRowTile.C._ - - /** Retrieve the cell type from the internal encoding. */ - val cellType: CellType = CellType.fromName(mem.getString(CELL_TYPE)) - - /** Retrieve the number of columns from the internal encoding. */ - val cols: Int = mem.getShort(COLS) - - /** Retrieve the number of rows from the internal encoding. */ - val rows: Int = mem.getShort(ROWS) - - /** Get the internally encoded tile data cells. */ - lazy val toBytes: Array[Byte] = mem.getBinary(DATA) - - private lazy val toByteBuffer: ByteBuffer = { - val data = toBytes - if(data.length < cols * rows && cellType.name != "bool") { - // Handling constant tiles like this is inefficient and ugly. All the edge - // cases associated with them create too much undue complexity for - // something that's unlikely to be - // used much in production to warrant handling them specially. - // If a more efficient handling is necessary, consider a flag in - // the UDT struct. - ByteBuffer.wrap(toArrayTile.toBytes()) - } else ByteBuffer.wrap(data) - } - - /** Reads the cell value at the given index as an Int. */ - def apply(i: Int): Int = cellReader(i) - - /** Reads the cell value at the given index as a Double. */ - def applyDouble(i: Int): Double = cellReader.applyDouble(i) - - /** @group COPIES */ - override def toArrayTile: ArrayTile = { - val data = toBytes - if(data.length < cols * rows && cellType.name != "bool") { - val ctile = InternalRowTile.constantTileFromBytes(data, cellType, cols, rows) - val atile = ctile.toArrayTile() - atile - } - else - ArrayTile.fromBytes(data, cellType, cols, rows) - } - - /** @group COPIES */ - def mutable: MutableArrayTile = toArrayTile().mutable - - /** @group COPIES */ - def interpretAs(newCellType: CellType): Tile = - toArrayTile().interpretAs(newCellType) - - /** @group COPIES */ - def withNoData(noDataValue: Option[Double]): Tile = - toArrayTile().withNoData(noDataValue) - - /** @group COPIES */ - def copy = new InternalRowTile(mem.copy) - - private lazy val cellReader: CellReader = { - cellType match { - case ct: ByteUserDefinedNoDataCellType ⇒ - ByteUDNDCellReader(this, ct.noDataValue) - case ct: UByteUserDefinedNoDataCellType ⇒ - UByteUDNDCellReader(this, ct.noDataValue) - case ct: ShortUserDefinedNoDataCellType ⇒ - ShortUDNDCellReader(this, ct.noDataValue) - case ct: UShortUserDefinedNoDataCellType ⇒ - UShortUDNDCellReader(this, ct.noDataValue) - case ct: IntUserDefinedNoDataCellType ⇒ - IntUDNDCellReader(this, ct.noDataValue) - case ct: FloatUserDefinedNoDataCellType ⇒ - FloatUDNDCellReader(this, ct.noDataValue) - case ct: DoubleUserDefinedNoDataCellType ⇒ - DoubleUDNDCellReader(this, ct.noDataValue) - case _: BitCells ⇒ BitCellReader(this) - case _: ByteCells ⇒ ByteCellReader(this) - case _: UByteCells ⇒ UByteCellReader(this) - case _: ShortCells ⇒ ShortCellReader(this) - case _: UShortCells ⇒ UShortCellReader(this) - case _: IntCells ⇒ IntCellReader(this) - case _: FloatCells ⇒ FloatCellReader(this) - case _: DoubleCells ⇒ DoubleCellReader(this) - } - } -} - -object InternalRowTile { - object C { - val CELL_TYPE = 0 - val COLS = 1 - val ROWS = 2 - val DATA = 3 - } - - val schema = StructType(Seq( - StructField("cellType", StringType, false), - StructField("cols", ShortType, false), - StructField("rows", ShortType, false), - StructField("data", BinaryType, false) - )) - - /** - * Constructor. - * @param row Catalyst internal format conforming to `shema` - * @return row wrapper - */ - def apply(row: InternalRow): InternalRowTile = new InternalRowTile(row) - - /** - * Convenience extractor for converting a `Tile` to an `InternalRow`. - * - * @param tile tile to convert - * @return Catalyst internal representation. - */ - def unapply(tile: Tile): Option[InternalRow] = Some( - InternalRow( - UTF8String.fromString(tile.cellType.name), - tile.cols.toShort, - tile.rows.toShort, - tile.toBytes) - ) - - sealed trait CellReader { - def apply(index: Int): Int - def applyDouble(index: Int): Double - } - - case class BitCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = - (t.toByteBuffer.get(i >> 3) >> (i & 7)) & 1 // See BitArrayTile.apply - def applyDouble(i: Int): Double = apply(i).toDouble - } - - case class ByteCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = b2i(t.toByteBuffer.get(i)) - def applyDouble(i: Int): Double = b2d(t.toByteBuffer.get(i)) - } - - case class ByteUDNDCellReader(t: InternalRowTile, userDefinedByteNoDataValue: Byte) - extends CellReader with UserDefinedByteNoDataConversions { - def apply(i: Int): Int = udb2i(t.toByteBuffer.get(i)) - def applyDouble(i: Int): Double = udb2d(t.toByteBuffer.get(i)) - } - - case class UByteCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = ub2i(t.toByteBuffer.get(i)) - def applyDouble(i: Int): Double = ub2d(t.toByteBuffer.get(i)) - } - - case class UByteUDNDCellReader(t: InternalRowTile, userDefinedByteNoDataValue: Byte) - extends CellReader with UserDefinedByteNoDataConversions { - def apply(i: Int): Int = udub2i(t.toByteBuffer.get(i)) - def applyDouble(i: Int): Double = udub2d(t.toByteBuffer.get(i)) - } - - case class ShortCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = s2i(t.toByteBuffer.asShortBuffer().get(i)) - def applyDouble(i: Int): Double = s2d(t.toByteBuffer.asShortBuffer().get(i)) - } - - case class ShortUDNDCellReader(t: InternalRowTile, userDefinedShortNoDataValue: Short) - extends CellReader with UserDefinedShortNoDataConversions { - def apply(i: Int): Int = uds2i(t.toByteBuffer.asShortBuffer().get(i)) - def applyDouble(i: Int): Double = uds2d(t.toByteBuffer.asShortBuffer().get(i)) - } - - case class UShortCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = us2i(t.toByteBuffer.asShortBuffer().get(i)) - def applyDouble(i: Int): Double = us2d(t.toByteBuffer.asShortBuffer().get(i)) - } - - case class UShortUDNDCellReader(t: InternalRowTile, userDefinedShortNoDataValue: Short) - extends CellReader with UserDefinedShortNoDataConversions { - def apply(i: Int): Int = udus2i(t.toByteBuffer.asShortBuffer().get(i)) - def applyDouble(i: Int): Double = udus2d(t.toByteBuffer.asShortBuffer().get(i)) - } - - case class IntCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = t.toByteBuffer.asIntBuffer().get(i) - def applyDouble(i: Int): Double = i2d(t.toByteBuffer.asIntBuffer().get(i)) - } - - case class IntUDNDCellReader(t: InternalRowTile, userDefinedIntNoDataValue: Int) - extends CellReader with UserDefinedIntNoDataConversions { - def apply(i: Int): Int = udi2i(t.toByteBuffer.asIntBuffer().get(i)) - def applyDouble(i: Int): Double = udi2d(t.toByteBuffer.asIntBuffer().get(i)) - } - - case class FloatCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = f2i(t.toByteBuffer.asFloatBuffer().get(i)) - def applyDouble(i: Int): Double = f2d(t.toByteBuffer.asFloatBuffer().get(i)) - } - - case class FloatUDNDCellReader(t: InternalRowTile, userDefinedFloatNoDataValue: Float) - extends CellReader with UserDefinedFloatNoDataConversions{ - def apply(i: Int): Int = udf2i(t.toByteBuffer.asFloatBuffer().get(i)) - def applyDouble(i: Int): Double = udf2d(t.toByteBuffer.asFloatBuffer().get(i)) - } - - case class DoubleCellReader(t: InternalRowTile) extends CellReader { - def apply(i: Int): Int = d2i(t.toByteBuffer.asDoubleBuffer().get(i)) - def applyDouble(i: Int): Double = t.toByteBuffer.asDoubleBuffer().get(i) - } - - case class DoubleUDNDCellReader(t: InternalRowTile, userDefinedDoubleNoDataValue: Double) - extends CellReader with UserDefinedDoubleNoDataConversions{ - def apply(i: Int): Int = udd2i(t.toByteBuffer.asDoubleBuffer().get(i)) - def applyDouble(i: Int): Double = udd2d(t.toByteBuffer.asDoubleBuffer().get(i)) - } - - // Temporary, until GeoTrellis 1.2 - // See https://github.com/locationtech/geotrellis/pull/2401 - private[gt] def constantTileFromBytes(bytes: Array[Byte], t: CellType, cols: Int, rows: Int): ConstantTile = - t match { - case _: BitCells => - BitConstantTile(BitArrayTile.fromBytes(bytes, 1, 1).array(0), cols, rows) - case ct: ByteCells => - ByteConstantTile(ByteArrayTile.fromBytes(bytes, 1, 1, ct).array(0), cols, rows, ct) - case ct: UByteCells => - UByteConstantTile(UByteArrayTile.fromBytes(bytes, 1, 1, ct).array(0), cols, rows, ct) - case ct: ShortCells => - ShortConstantTile(ShortArrayTile.fromBytes(bytes, 1, 1, ct).array(0), cols, rows, ct) - case ct: UShortCells => - UShortConstantTile(UShortArrayTile.fromBytes(bytes, 1, 1, ct).array(0), cols, rows, ct) - case ct: IntCells => - IntConstantTile(IntArrayTile.fromBytes(bytes, 1, 1, ct).array(0), cols, rows, ct) - case ct: FloatCells => - FloatConstantTile(FloatArrayTile.fromBytes(bytes, 1, 1, ct).array(0), cols, rows, ct) - case ct: DoubleCells => - DoubleConstantTile(DoubleArrayTile.fromBytes(bytes, 1, 1, ct).array(0), cols, rows, ct) - } -} diff --git a/src/main/scala/org/apache/spark/sql/gt/types/TileUDT.scala b/src/main/scala/org/apache/spark/sql/gt/types/TileUDT.scala deleted file mode 100644 index 9b054cb07..000000000 --- a/src/main/scala/org/apache/spark/sql/gt/types/TileUDT.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package org.apache.spark.sql.gt.types - - -import geotrellis.raster._ -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.gt.InternalRowTile -import org.apache.spark.sql.types.{DataType, _} - -/** - * UDT for singleband tiles. - * - * @author sfitch - * @since 5/11/17 - */ -class TileUDT extends UserDefinedType[Tile] { - override def typeName = "rf_tile" - - def sqlType = InternalRowTile.schema - - override def serialize(obj: Tile): InternalRow = { - Option(obj) - .map { case InternalRowTile(row) ⇒ row } - .orNull - } - - override def deserialize(datum: Any): Tile = { - Option(datum) - .collect { case row: InternalRow ⇒ InternalRowTile(row).toArrayTile } - .orNull - } - - def userClass: Class[Tile] = classOf[Tile] - - private[sql] override def acceptsType(dataType: DataType) = dataType match { - case _: TileUDT ⇒ true - case _ ⇒ super.acceptsType(dataType) - } -} - -case object TileUDT extends TileUDT { - UDTRegistration.register(classOf[Tile].getName, classOf[TileUDT].getName) -} diff --git a/src/main/tut/release-notes.md b/src/main/tut/release-notes.md deleted file mode 100644 index c26510dce..000000000 --- a/src/main/tut/release-notes.md +++ /dev/null @@ -1,66 +0,0 @@ -# Release Notes -## 0.5.11 - -* Significant performance improvement in `explodeTiles` (1-2 orders of magnitude). See [#38](https://github.com/s22s/raster-frames/issues/38) -* Fixed bugs in `NoData` handling when converting to `Double` tiles. - -## 0.5.12 - -* Added `withSpatialIndex` to introduce a column assigning a z-curve index value based on the tile's centroid in EPSG:4326. -* Added column-appending convenience methods: `withExtent`, `withCenter`, `withCenterLatLng` -* Documented example of creating a GeoTrellis layer from a RasterFrame. -* Added Spark 2.2.0 forward-compatibility -* Upgraded to GeoTrellis 1.2.0-RC2 - -## 0.5.11 - -* Significant performance improvement in `explodeTiles` (1-2 orders of magnitude). See [#38](https://github.com/s22s/raster-frames/issues/38) -* Fixed bugs in `NoData` handling when converting to `Double` tiles. - -## 0.5.10 - -* Upgraded to shapeless 2.3.2 -* Fixed [#36](https://github.com/s22s/raster-frames/issues/36), [#37](https://github.com/s22s/raster-frames/issues/37) - -## 0.5.9 - -* Ported to sbt 1.0.3 -* Added sbt-generated `astraea.spark.rasterframes.RFBuildInfo` -* Fixed bug in computing `aggMean` when one or more tiles are `null` -* Deprecated `rfIinit` in favor of `SparkSession.withRasterFrames` or `SQLContext.withRasterFrames` extension methods - -## 0.5.8 - -* Upgraded to GeoTrellis 1.2.0-RC1 -* Added [`REPLsent`-based](https://github.com/marconilanna/REPLesent) tour of RasterFrames -* Moved Giter8 template to separate repository `s22s/raster-frames.g8` due to sbt limitations -* Updated _Getting Started_ to reference new Giter8 repo -* Changed SQL function name `rf_stats` and `rf_histogram` to `rf_aggStats` and `rf_aggHistogram` - for consistency with DataFrames API - -## 0.5.7 - -* Created faster implementation of aggregate statistics. -* Fixed bug in deserialization of `TileUDT`s originating from `ConstantTile`s -* Fixed bug in serialization of `NoDataFilter` within SparkML pipeline -* Refactoring of UDF organization -* Various documentation tweaks and updates -* Added Giter8 template - -## 0.5.6 - -* `TileUDF`s are encoded using directly into Catalyst--without Kryo--resulting in an insane - decrease in serialization time for small tiles (`int8`, <= 128²), and pretty awesome speedup for - all other cell types other than `float32` (marginal slowing). While not measured, memory - footprint is expected to have gone down. - - -## 0.5.5 - -* `aggStats` and `tileMean` functions rewritten to compute simple statistics directly rather than using `StreamingHistogram` -* `tileHistogramDouble` and `tileStatsDouble` were replaced by `tileHistogram` and `tileStats` -* Added `tileSum`, `tileMin` and `tileMax` functions -* Added `aggMean`, `aggDataCells` and `aggNoDataCells` aggregate functions. -* Added `localAggDataCells` and `localAggNoDataCells` cell-local (tile generating) fuctions -* Added `tileToArray` and `arrayToTile` -* Overflow fix in `LocalStatsAggregateFunction` diff --git a/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala b/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala deleted file mode 100644 index 463dde3fb..000000000 --- a/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala +++ /dev/null @@ -1,68 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package astraea.spark.rasterframes - -import geotrellis.proj4.LatLng -import geotrellis.vector.{Extent, Point} -import org.locationtech.geomesa.curve.{Z2SFC, Z3SFC} - -/** - * Test rig associated with spatial key related extension methods - * - * @author sfitch - * @since 12/15/17 - */ -class SpatialKeySpec extends TestEnvironment with TestData { - // This is to avoid an IntelliJ error - protected def withFixture(test: Any) = ??? - import spark.implicits._ - - describe("Spatial key conversions") { - val raster = sampleGeoTiff.projectedRaster - // Create a raster frame with a single row - val rf = raster.toRF(raster.tile.cols, raster.tile.rows) - - it("should add an extent column") { - val expected = raster.extent - val result = rf.withExtent().select($"extent".as[Extent]).first - assert(result === expected) - } - - it("should add a center value") { - val expected = raster.extent.center - val result = rf.withCenter().select($"center".as[(Double, Double)]).first - assert(Point(result._1, result._2) === expected) - } - - it("should add a center lat/lng value") { - val expected = raster.extent.center.reproject(raster.crs, LatLng) - val result = rf.withCenterLatLng().select($"center".as[(Double, Double)]).first - assert( Point(result._1, result._2) === expected) - } - - it("should add a z-index value") { - val center = raster.extent.center.reproject(raster.crs, LatLng) - val expected = Z2SFC.index(center.x, center.y).z - val result = rf.withSpatialIndex().select($"spatial_index".as[Long]).first - assert(result === expected) - } - } - -} diff --git a/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala b/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala deleted file mode 100644 index e14ea0568..000000000 --- a/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala +++ /dev/null @@ -1,97 +0,0 @@ - - -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package astraea.spark.rasterframes - -import geotrellis.raster.{ByteArrayTile, Tile} -import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder -import org.apache.spark.sql.gt.{InternalRowTile, types} -import org.apache.spark.sql.gt.types.TileUDT -import org.scalatest.Inspectors - -/** - * RasterFrame test rig. - * - * @author sfitch - * @since 7/10/17 - */ -class TileUDTSpec extends TestEnvironment with TestData with Inspectors { - // This is to avoid an IntelliJ error - protected def withFixture(test: Any) = ??? - import TestData.randomTile - - spark.version - val tileEncoder: ExpressionEncoder[Tile] = ExpressionEncoder() - - describe("TileUDT") { - val tileSizes = Seq(2, 64, 128, 222, 511) - val cellTypes = types.cellTypes().filter(_ != "bool") - - def forEveryConfig(test: (Tile) ⇒ Unit): Unit = { - forEvery(tileSizes.combinations(2).toSeq) { case Seq(cols, rows) ⇒ - forEvery(cellTypes) { ct ⇒ - val tile = randomTile(cols, rows, ct) - test(tile) - } - } - } - - it("should (de)serialize tile") { - forEveryConfig { tile ⇒ - val row = TileUDT.serialize(tile) - val tileAgain = TileUDT.deserialize(row) - assert(tileAgain === tile) - } - } - - it("should (en/de)code tile") { - forEveryConfig { tile ⇒ - val row = tileEncoder.toRow(tile) - assert(!row.isNullAt(0)) - val tileAgain = TileUDT.deserialize(row.getStruct(0, TileUDT.sqlType.size)) - assert(tileAgain === tile) - } - } - - it("should extract properties") { - forEveryConfig { tile ⇒ - val row = TileUDT.serialize(tile) - val wrapper = new InternalRowTile(row) - assert(wrapper.cols === tile.cols) - assert(wrapper.rows === tile.rows) - assert(wrapper.cellType === tile.cellType) - } - } - - it("should directly extract cells") { - forEveryConfig { tile ⇒ - val row = TileUDT.serialize(tile) - val wrapper = new InternalRowTile(row) - val (cols,rows) = wrapper.dimensions - val indexes = Seq((0, 0), (cols - 1, rows - 1), (cols/2, rows/2), (1, 1)) - forAll(indexes) { case (c, r) ⇒ - assert(wrapper.get(c, r) === tile.get(c, r)) - assert(wrapper.getDouble(c, r) === tile.getDouble(c, r)) - } - } - } - } -} diff --git a/version.sbt b/version.sbt index 0512f1612..421f10f23 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "0.6.0" +version in ThisBuild := "0.6.1"