NVIDIA · jlowe · Jul 28, 2020 · Jul 28, 2020 · Jul 28, 2020 · Jul 28, 2020
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
@@ -96,19 +96,19 @@ You can also disable only one rule, by specifying its rule id, as specified in:
     <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage"
            enabled="true"/>
 
-    <!-- This project uses Javadoc rather than Scaladoc so scaladoc checks are disabled -->
-    <check enabled="false" class="org.scalastyle.scalariform.ScalaDocChecker" level="warning"/>
-
-    <!-- ================================================================================ -->
-    <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
-    <!-- ================================================================================ -->
-
     <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker"
-           enabled="false">
+           enabled="true">
         <parameters>
-            <parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter>
+            <parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter>
         </parameters>
         <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
     </check>
 
+    <!-- ================================================================================ -->
+    <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
+    <!-- ================================================================================ -->
+
+    <!-- This project uses Javadoc rather than Scaladoc so scaladoc checks are disabled -->
+    <check enabled="false" class="org.scalastyle.scalariform.ScalaDocChecker" level="warning"/>
+
 </scalastyle>
diff --git a/sql-plugin/src/main/scala/ai/rapids/cudf/CudaUtil.scala b/sql-plugin/src/main/scala/ai/rapids/cudf/CudaUtil.scala
@@ -18,15 +18,15 @@ package ai.rapids.cudf
 
 object CudaUtil {
   /**
-    * Copy from `src` buffer, starting at `srcOffset`,
-    * to a destination buffer `dst` starting at `dstOffset`,
-    * `length` bytes, in the default stream.
-    * @param src source buffer
-    * @param srcOffset source offset
-    * @param dst destination buffer
-    * @param dstOffset destination offset
-    * @param length amount to copy
-    */
+   * Copy from `src` buffer, starting at `srcOffset`,
+   * to a destination buffer `dst` starting at `dstOffset`,
+   * `length` bytes, in the default stream.
+   * @param src source buffer
+   * @param srcOffset source offset
+   * @param dst destination buffer
+   * @param dstOffset destination offset
+   * @param length amount to copy
+   */
   def copy(src: MemoryBuffer, srcOffset: Long, dst: MemoryBuffer,
            dstOffset: Long, length: Long): Unit = {
     Cuda.memcpy(

diff --git a/...n/src/main/scala/com/nvidia/spark/rapids/ColumnarPartitionReaderWithPartitionValues.scala b/...n/src/main/scala/com/nvidia/spark/rapids/ColumnarPartitionReaderWithPartitionValues.scala
@@ -25,10 +25,10 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 /**
-  * A wrapper reader that always appends partition values to the ColumnarBatch produced by the input
-  * reader `fileReader`. Each scalar value is splatted to a column with the same number of
-  * rows as the batch returned by the reader.
-  */
+ * A wrapper reader that always appends partition values to the ColumnarBatch produced by the input
+ * reader `fileReader`. Each scalar value is splatted to a column with the same number of
+ * rows as the batch returned by the reader.
+ */
 class ColumnarPartitionReaderWithPartitionValues(
     fileReader: PartitionReader[ColumnarBatch],
     partitionValues: Array[Scalar]) extends PartitionReader[ColumnarBatch] {

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala
@@ -179,14 +179,14 @@ case class GpuOrcPartitionReaderFactory(
 
 object GpuOrcPartitionReader {
   /**
-    * This class describes a stripe that will appear in the ORC output memory file.
-    *
-    * @param infoBuilder builder for output stripe info that has been populated with
-    *                    all fields except those that can only be known when the file
-    *                    is being written (e.g.: file offset, compressed footer length)
-    * @param footer stripe footer
-    * @param inputDataRanges input file ranges (based at file offset 0) of stripe data
-    */
+   * This class describes a stripe that will appear in the ORC output memory file.
+   *
+   * @param infoBuilder builder for output stripe info that has been populated with
+   *                    all fields except those that can only be known when the file
+   *                    is being written (e.g.: file offset, compressed footer length)
+   * @param footer stripe footer
+   * @param inputDataRanges input file ranges (based at file offset 0) of stripe data
+   */
   private case class OrcOutputStripe(
       infoBuilder: OrcProto.StripeInformation.Builder,
       footer: OrcProto.StripeFooter,
@@ -200,32 +200,32 @@ object GpuOrcPartitionReader {
     OrcProto.Stream.Kind.ROW_INDEX)
 
   /**
-    * This class holds fields needed to read and iterate over the OrcFile
-    *
-    * @param updatedReadSchema read schema mapped to the file's field names
-    * @param evolution ORC SchemaEvolution
-    * @param dataReader ORC DataReader
-    * @param orcReader ORC Input File Reader
-    * @param blockIterator An iterator over the ORC output stripes
-    */
+   * This class holds fields needed to read and iterate over the OrcFile
+   *
+   * @param updatedReadSchema read schema mapped to the file's field names
+   * @param evolution ORC SchemaEvolution
+   * @param dataReader ORC DataReader
+   * @param orcReader ORC Input File Reader
+   * @param blockIterator An iterator over the ORC output stripes
+   */
   private case class OrcPartitionReaderContext(updatedReadSchema: TypeDescription,
     evolution: SchemaEvolution, dataReader: DataReader, orcReader: Reader,
     blockIterator: BufferedIterator[OrcOutputStripe])
 }
 
 /**
-  * A PartitionReader that reads an ORC file split on the GPU.
-  *
-  * Efficiently reading an ORC split on the GPU requires rebuilding the ORC file
-  * in memory such that only relevant data is present in the memory file.
-  * This avoids sending unnecessary data to the GPU and saves GPU memory.
-  *
-  * @param conf Hadoop configuration
-  * @param partFile file split to read
-  * @param dataSchema Spark schema of the file
-  * @param readDataSchema Spark schema of what will be read from the file
-  * @param debugDumpPrefix path prefix for dumping the memory file or null
-  */
+ * A PartitionReader that reads an ORC file split on the GPU.
+ *
+ * Efficiently reading an ORC split on the GPU requires rebuilding the ORC file
+ * in memory such that only relevant data is present in the memory file.
+ * This avoids sending unnecessary data to the GPU and saves GPU memory.
+ *
+ * @param conf Hadoop configuration
+ * @param partFile file split to read
+ * @param dataSchema Spark schema of the file
+ * @param readDataSchema Spark schema of what will be read from the file
+ * @param debugDumpPrefix path prefix for dumping the memory file or null
+ */
 class GpuOrcPartitionReader(
     conf: Configuration,
     partFile: PartitionedFile,
@@ -319,13 +319,13 @@ class GpuOrcPartitionReader(
   }
 
   /**
-    * Build an integer array that maps the original ORC file's column IDs
-    * to column IDs in the memory file. Columns that are not present in
-    * the memory file will have a mapping of -1.
-    *
-    * @param evolution ORC SchemaEvolution
-    * @return column mapping array
-    */
+   * Build an integer array that maps the original ORC file's column IDs
+   * to column IDs in the memory file. Columns that are not present in
+   * the memory file will have a mapping of -1.
+   *
+   * @param evolution ORC SchemaEvolution
+   * @return column mapping array
+   */
   private def columnRemap(evolution: SchemaEvolution): Array[Int] = {
     val fileIncluded = evolution.getFileIncluded
     if (fileIncluded != null) {
@@ -346,17 +346,17 @@ class GpuOrcPartitionReader(
   }
 
   /**
-    * Build the output stripe descriptors for what will appear in the ORC memory file.
-    *
-    * @param stripes descriptors for the ORC input stripes, filtered to what is in the split
-    * @param evolution ORC SchemaEvolution
-    * @param sargApp ORC search argument applier
-    * @param sargColumns mapping of ORC search argument columns
-    * @param ignoreNonUtf8BloomFilter true if bloom filters other than UTF8 should be ignored
-    * @param writerVersion writer version from the original ORC input file
-    * @param dataReader ORC DataReader
-    * @return output stripes descriptors
-    */
+   * Build the output stripe descriptors for what will appear in the ORC memory file.
+   *
+   * @param stripes descriptors for the ORC input stripes, filtered to what is in the split
+   * @param evolution ORC SchemaEvolution
+   * @param sargApp ORC search argument applier
+   * @param sargColumns mapping of ORC search argument columns
+   * @param ignoreNonUtf8BloomFilter true if bloom filters other than UTF8 should be ignored
+   * @param writerVersion writer version from the original ORC input file
+   * @param dataReader ORC DataReader
+   * @return output stripes descriptors
+   */
   private def buildOutputStripes(
       stripes: Seq[StripeInformation],
       evolution: SchemaEvolution,
@@ -392,14 +392,14 @@ class GpuOrcPartitionReader(
   }
 
   /**
-    * Build the output stripe descriptor for a corresponding input stripe
-    * that should be copied to the ORC memory file.
-    *
-    * @param inputStripe input stripe descriptor
-    * @param inputFooter input stripe footer
-    * @param columnMapping mapping of input column IDs to output column IDs
-    * @return output stripe descriptor
-    */
+   * Build the output stripe descriptor for a corresponding input stripe
+   * that should be copied to the ORC memory file.
+   *
+   * @param inputStripe input stripe descriptor
+   * @param inputFooter input stripe footer
+   * @param columnMapping mapping of input column IDs to output column IDs
+   * @return output stripe descriptor
+   */
   private def buildOutputStripe(
       inputStripe: StripeInformation,
       inputFooter: OrcProto.StripeFooter,
@@ -564,13 +564,13 @@ class GpuOrcPartitionReader(
   }
 
   /**
-    * Check if the read schema is compatible with the file schema.
-    *
-    * @param fileSchema input file's ORC schema
-    * @param readSchema ORC schema for what will be read
-    * @param isCaseAware true if field names are case-sensitive
-    * @return read schema mapped to the file's field names
-    */
+   * Check if the read schema is compatible with the file schema.
+   *
+   * @param fileSchema input file's ORC schema
+   * @param readSchema ORC schema for what will be read
+   * @param isCaseAware true if field names are case-sensitive
+   * @return read schema mapped to the file's field names
+   */
   private def checkSchemaCompatibility(
       fileSchema: TypeDescription,
       readSchema: TypeDescription,
@@ -602,15 +602,15 @@ class GpuOrcPartitionReader(
   }
 
   /**
-    * Build an ORC search argument applier that can filter input file splits
-    * when predicate push-down filters have been specified.
-    *
-    * @param orcReader ORC input file reader
-    * @param readerOpts ORC reader options
-    * @param evolution ORC SchemaEvolution
-    * @param useUTCTimestamp true if timestamps are UTC
-    * @return the search argument applier and search argument column mapping
-    */
+   * Build an ORC search argument applier that can filter input file splits
+   * when predicate push-down filters have been specified.
+   *
+   * @param orcReader ORC input file reader
+   * @param readerOpts ORC reader options
+   * @param evolution ORC SchemaEvolution
+   * @param useUTCTimestamp true if timestamps are UTC
+   * @return the search argument applier and search argument column mapping
+   */
   private def getSearchApplier(
       orcReader: Reader,
       readerOpts: Reader.Options,

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala
@@ -247,22 +247,22 @@ case class GpuParquetPartitionReaderFactory(
 }
 
 /**
-  * A PartitionReader that reads a Parquet file split on the GPU.
-  *
-  * Efficiently reading a Parquet split on the GPU requires re-constructing the Parquet file
-  * in memory that contains just the column chunks that are needed. This avoids sending
-  * unnecessary data to the GPU and saves GPU memory.
-  *
-  * @param conf the Hadoop configuration
-  * @param split the file split to read
-  * @param filePath the path to the Parquet file
-  * @param clippedBlocks the block metadata from the original Parquet file that has been clipped
-  *                      to only contain the column chunks to be read
-  * @param clippedParquetSchema the Parquet schema from the original Parquet file that has been
-  *                             clipped to contain only the columns to be read
-  * @param readDataSchema the Spark schema describing what will be read
-  * @param debugDumpPrefix a path prefix to use for dumping the fabricated Parquet data or null
-  */
+ * A PartitionReader that reads a Parquet file split on the GPU.
+ *
+ * Efficiently reading a Parquet split on the GPU requires re-constructing the Parquet file
+ * in memory that contains just the column chunks that are needed. This avoids sending
+ * unnecessary data to the GPU and saves GPU memory.
+ *
+ * @param conf the Hadoop configuration
+ * @param split the file split to read
+ * @param filePath the path to the Parquet file
+ * @param clippedBlocks the block metadata from the original Parquet file that has been clipped
+ *                      to only contain the column chunks to be read
+ * @param clippedParquetSchema the Parquet schema from the original Parquet file that has been
+ *                             clipped to contain only the columns to be read
+ * @param readDataSchema the Spark schema describing what will be read
+ * @param debugDumpPrefix a path prefix to use for dumping the fabricated Parquet data or null
+ */
 class ParquetPartitionReader(
     conf: Configuration,
     split: PartitionedFile,
@@ -391,15 +391,15 @@ class ParquetPartitionReader(
   }
 
   /**
-    * Copies the data corresponding to the clipped blocks in the original file and compute the
-    * block metadata for the output. The output blocks will contain the same column chunk
-    * metadata but with the file offsets updated to reflect the new position of the column data
-    * as written to the output.
-    *
-    * @param in the input stream for the original Parquet file
-    * @param out the output stream to receive the data
-    * @return updated block metadata corresponding to the output
-    */
+   * Copies the data corresponding to the clipped blocks in the original file and compute the
+   * block metadata for the output. The output blocks will contain the same column chunk
+   * metadata but with the file offsets updated to reflect the new position of the column data
+   * as written to the output.
+   *
+   * @param in the input stream for the original Parquet file
+   * @param out the output stream to receive the data
+   * @return updated block metadata corresponding to the output
+   */
   private def copyBlocksData(
       in: FSDataInputStream,
       out: HostMemoryOutputStream,
@@ -626,12 +626,12 @@ object ParquetPartitionReader {
   private case class CopyRange(offset: Long, length: Long)
 
   /**
-    * Build a new BlockMetaData
-    *
-    * @param rowCount the number of rows in this block
-    * @param columns the new column chunks to reference in the new BlockMetaData
-    * @return the new BlockMetaData
-    */
+   * Build a new BlockMetaData
+   *
+   * @param rowCount the number of rows in this block
+   * @param columns the new column chunks to reference in the new BlockMetaData
+   * @return the new BlockMetaData
+   */
   private def newParquetBlock(
       rowCount: Long,
       columns: Seq[ColumnChunkMetaData]): BlockMetaData = {
@@ -649,14 +649,14 @@ object ParquetPartitionReader {
   }
 
   /**
-    * Trim block metadata to contain only the column chunks that occur in the specified columns.
-    * The column chunks that are returned are preserved verbatim
-    * (i.e.: file offsets remain unchanged).
-    *
-    * @param columnPaths the paths of columns to preserve
-    * @param blocks the block metadata from the original Parquet file
-    * @return the updated block metadata with undesired column chunks removed
-    */
+   * Trim block metadata to contain only the column chunks that occur in the specified columns.
+   * The column chunks that are returned are preserved verbatim
+   * (i.e.: file offsets remain unchanged).
+   *
+   * @param columnPaths the paths of columns to preserve
+   * @param blocks the block metadata from the original Parquet file
+   * @return the updated block metadata with undesired column chunks removed
+   */
   private[spark] def clipBlocks(columnPaths: Seq[ColumnPath],
       blocks: Seq[BlockMetaData]): Seq[BlockMetaData] = {
     val pathSet = columnPaths.toSet