Skip to content

Commit

Permalink
Enforce NoScalaDoc rule in scalastyle checks (NVIDIA#449)
Browse files Browse the repository at this point in the history
* fix rule

* fix violations

* fix violations

* fix violations

* remove comment

* re-order rules, add comment back

* fix violations in tests module
  • Loading branch information
andygrove authored Jul 28, 2020
1 parent cd283fb commit 9f947b7
Show file tree
Hide file tree
Showing 30 changed files with 1,127 additions and 1,127 deletions.
18 changes: 9 additions & 9 deletions scalastyle-config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,19 @@ You can also disable only one rule, by specifying its rule id, as specified in:
<check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage"
enabled="true"/>

<!-- This project uses Javadoc rather than Scaladoc so scaladoc checks are disabled -->
<check enabled="false" class="org.scalastyle.scalariform.ScalaDocChecker" level="warning"/>

<!-- ================================================================================ -->
<!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
<!-- ================================================================================ -->

<check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker"
enabled="false">
enabled="true">
<parameters>
<parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter>
<parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter>
</parameters>
<customMessage>Use Javadoc style indentation for multiline comments</customMessage>
</check>

<!-- ================================================================================ -->
<!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
<!-- ================================================================================ -->

<!-- This project uses Javadoc rather than Scaladoc so scaladoc checks are disabled -->
<check enabled="false" class="org.scalastyle.scalariform.ScalaDocChecker" level="warning"/>

</scalastyle>
18 changes: 9 additions & 9 deletions sql-plugin/src/main/scala/ai/rapids/cudf/CudaUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ package ai.rapids.cudf

object CudaUtil {
/**
* Copy from `src` buffer, starting at `srcOffset`,
* to a destination buffer `dst` starting at `dstOffset`,
* `length` bytes, in the default stream.
* @param src source buffer
* @param srcOffset source offset
* @param dst destination buffer
* @param dstOffset destination offset
* @param length amount to copy
*/
* Copy from `src` buffer, starting at `srcOffset`,
* to a destination buffer `dst` starting at `dstOffset`,
* `length` bytes, in the default stream.
* @param src source buffer
* @param srcOffset source offset
* @param dst destination buffer
* @param dstOffset destination offset
* @param length amount to copy
*/
def copy(src: MemoryBuffer, srcOffset: Long, dst: MemoryBuffer,
dstOffset: Long, length: Long): Unit = {
Cuda.memcpy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.vectorized.ColumnarBatch

/**
* A wrapper reader that always appends partition values to the ColumnarBatch produced by the input
* reader `fileReader`. Each scalar value is splatted to a column with the same number of
* rows as the batch returned by the reader.
*/
* A wrapper reader that always appends partition values to the ColumnarBatch produced by the input
* reader `fileReader`. Each scalar value is splatted to a column with the same number of
* rows as the batch returned by the reader.
*/
class ColumnarPartitionReaderWithPartitionValues(
fileReader: PartitionReader[ColumnarBatch],
partitionValues: Array[Scalar]) extends PartitionReader[ColumnarBatch] {
Expand Down
140 changes: 70 additions & 70 deletions sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,14 @@ case class GpuOrcPartitionReaderFactory(

object GpuOrcPartitionReader {
/**
* This class describes a stripe that will appear in the ORC output memory file.
*
* @param infoBuilder builder for output stripe info that has been populated with
* all fields except those that can only be known when the file
* is being written (e.g.: file offset, compressed footer length)
* @param footer stripe footer
* @param inputDataRanges input file ranges (based at file offset 0) of stripe data
*/
* This class describes a stripe that will appear in the ORC output memory file.
*
* @param infoBuilder builder for output stripe info that has been populated with
* all fields except those that can only be known when the file
* is being written (e.g.: file offset, compressed footer length)
* @param footer stripe footer
* @param inputDataRanges input file ranges (based at file offset 0) of stripe data
*/
private case class OrcOutputStripe(
infoBuilder: OrcProto.StripeInformation.Builder,
footer: OrcProto.StripeFooter,
Expand All @@ -200,32 +200,32 @@ object GpuOrcPartitionReader {
OrcProto.Stream.Kind.ROW_INDEX)

/**
* This class holds fields needed to read and iterate over the OrcFile
*
* @param updatedReadSchema read schema mapped to the file's field names
* @param evolution ORC SchemaEvolution
* @param dataReader ORC DataReader
* @param orcReader ORC Input File Reader
* @param blockIterator An iterator over the ORC output stripes
*/
* This class holds fields needed to read and iterate over the OrcFile
*
* @param updatedReadSchema read schema mapped to the file's field names
* @param evolution ORC SchemaEvolution
* @param dataReader ORC DataReader
* @param orcReader ORC Input File Reader
* @param blockIterator An iterator over the ORC output stripes
*/
private case class OrcPartitionReaderContext(updatedReadSchema: TypeDescription,
evolution: SchemaEvolution, dataReader: DataReader, orcReader: Reader,
blockIterator: BufferedIterator[OrcOutputStripe])
}

/**
* A PartitionReader that reads an ORC file split on the GPU.
*
* Efficiently reading an ORC split on the GPU requires rebuilding the ORC file
* in memory such that only relevant data is present in the memory file.
* This avoids sending unnecessary data to the GPU and saves GPU memory.
*
* @param conf Hadoop configuration
* @param partFile file split to read
* @param dataSchema Spark schema of the file
* @param readDataSchema Spark schema of what will be read from the file
* @param debugDumpPrefix path prefix for dumping the memory file or null
*/
* A PartitionReader that reads an ORC file split on the GPU.
*
* Efficiently reading an ORC split on the GPU requires rebuilding the ORC file
* in memory such that only relevant data is present in the memory file.
* This avoids sending unnecessary data to the GPU and saves GPU memory.
*
* @param conf Hadoop configuration
* @param partFile file split to read
* @param dataSchema Spark schema of the file
* @param readDataSchema Spark schema of what will be read from the file
* @param debugDumpPrefix path prefix for dumping the memory file or null
*/
class GpuOrcPartitionReader(
conf: Configuration,
partFile: PartitionedFile,
Expand Down Expand Up @@ -319,13 +319,13 @@ class GpuOrcPartitionReader(
}

/**
* Build an integer array that maps the original ORC file's column IDs
* to column IDs in the memory file. Columns that are not present in
* the memory file will have a mapping of -1.
*
* @param evolution ORC SchemaEvolution
* @return column mapping array
*/
* Build an integer array that maps the original ORC file's column IDs
* to column IDs in the memory file. Columns that are not present in
* the memory file will have a mapping of -1.
*
* @param evolution ORC SchemaEvolution
* @return column mapping array
*/
private def columnRemap(evolution: SchemaEvolution): Array[Int] = {
val fileIncluded = evolution.getFileIncluded
if (fileIncluded != null) {
Expand All @@ -346,17 +346,17 @@ class GpuOrcPartitionReader(
}

/**
* Build the output stripe descriptors for what will appear in the ORC memory file.
*
* @param stripes descriptors for the ORC input stripes, filtered to what is in the split
* @param evolution ORC SchemaEvolution
* @param sargApp ORC search argument applier
* @param sargColumns mapping of ORC search argument columns
* @param ignoreNonUtf8BloomFilter true if bloom filters other than UTF8 should be ignored
* @param writerVersion writer version from the original ORC input file
* @param dataReader ORC DataReader
* @return output stripes descriptors
*/
* Build the output stripe descriptors for what will appear in the ORC memory file.
*
* @param stripes descriptors for the ORC input stripes, filtered to what is in the split
* @param evolution ORC SchemaEvolution
* @param sargApp ORC search argument applier
* @param sargColumns mapping of ORC search argument columns
* @param ignoreNonUtf8BloomFilter true if bloom filters other than UTF8 should be ignored
* @param writerVersion writer version from the original ORC input file
* @param dataReader ORC DataReader
* @return output stripes descriptors
*/
private def buildOutputStripes(
stripes: Seq[StripeInformation],
evolution: SchemaEvolution,
Expand Down Expand Up @@ -392,14 +392,14 @@ class GpuOrcPartitionReader(
}

/**
* Build the output stripe descriptor for a corresponding input stripe
* that should be copied to the ORC memory file.
*
* @param inputStripe input stripe descriptor
* @param inputFooter input stripe footer
* @param columnMapping mapping of input column IDs to output column IDs
* @return output stripe descriptor
*/
* Build the output stripe descriptor for a corresponding input stripe
* that should be copied to the ORC memory file.
*
* @param inputStripe input stripe descriptor
* @param inputFooter input stripe footer
* @param columnMapping mapping of input column IDs to output column IDs
* @return output stripe descriptor
*/
private def buildOutputStripe(
inputStripe: StripeInformation,
inputFooter: OrcProto.StripeFooter,
Expand Down Expand Up @@ -564,13 +564,13 @@ class GpuOrcPartitionReader(
}

/**
* Check if the read schema is compatible with the file schema.
*
* @param fileSchema input file's ORC schema
* @param readSchema ORC schema for what will be read
* @param isCaseAware true if field names are case-sensitive
* @return read schema mapped to the file's field names
*/
* Check if the read schema is compatible with the file schema.
*
* @param fileSchema input file's ORC schema
* @param readSchema ORC schema for what will be read
* @param isCaseAware true if field names are case-sensitive
* @return read schema mapped to the file's field names
*/
private def checkSchemaCompatibility(
fileSchema: TypeDescription,
readSchema: TypeDescription,
Expand Down Expand Up @@ -602,15 +602,15 @@ class GpuOrcPartitionReader(
}

/**
* Build an ORC search argument applier that can filter input file splits
* when predicate push-down filters have been specified.
*
* @param orcReader ORC input file reader
* @param readerOpts ORC reader options
* @param evolution ORC SchemaEvolution
* @param useUTCTimestamp true if timestamps are UTC
* @return the search argument applier and search argument column mapping
*/
* Build an ORC search argument applier that can filter input file splits
* when predicate push-down filters have been specified.
*
* @param orcReader ORC input file reader
* @param readerOpts ORC reader options
* @param evolution ORC SchemaEvolution
* @param useUTCTimestamp true if timestamps are UTC
* @return the search argument applier and search argument column mapping
*/
private def getSearchApplier(
orcReader: Reader,
readerOpts: Reader.Options,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,22 +288,22 @@ case class GpuParquetPartitionReaderFactory(
}

/**
* A PartitionReader that reads a Parquet file split on the GPU.
*
* Efficiently reading a Parquet split on the GPU requires re-constructing the Parquet file
* in memory that contains just the column chunks that are needed. This avoids sending
* unnecessary data to the GPU and saves GPU memory.
*
* @param conf the Hadoop configuration
* @param split the file split to read
* @param filePath the path to the Parquet file
* @param clippedBlocks the block metadata from the original Parquet file that has been clipped
* to only contain the column chunks to be read
* @param clippedParquetSchema the Parquet schema from the original Parquet file that has been
* clipped to contain only the columns to be read
* @param readDataSchema the Spark schema describing what will be read
* @param debugDumpPrefix a path prefix to use for dumping the fabricated Parquet data or null
*/
* A PartitionReader that reads a Parquet file split on the GPU.
*
* Efficiently reading a Parquet split on the GPU requires re-constructing the Parquet file
* in memory that contains just the column chunks that are needed. This avoids sending
* unnecessary data to the GPU and saves GPU memory.
*
* @param conf the Hadoop configuration
* @param split the file split to read
* @param filePath the path to the Parquet file
* @param clippedBlocks the block metadata from the original Parquet file that has been clipped
* to only contain the column chunks to be read
* @param clippedParquetSchema the Parquet schema from the original Parquet file that has been
* clipped to contain only the columns to be read
* @param readDataSchema the Spark schema describing what will be read
* @param debugDumpPrefix a path prefix to use for dumping the fabricated Parquet data or null
*/
class ParquetPartitionReader(
conf: Configuration,
split: PartitionedFile,
Expand Down Expand Up @@ -433,15 +433,15 @@ class ParquetPartitionReader(
}

/**
* Copies the data corresponding to the clipped blocks in the original file and compute the
* block metadata for the output. The output blocks will contain the same column chunk
* metadata but with the file offsets updated to reflect the new position of the column data
* as written to the output.
*
* @param in the input stream for the original Parquet file
* @param out the output stream to receive the data
* @return updated block metadata corresponding to the output
*/
* Copies the data corresponding to the clipped blocks in the original file and compute the
* block metadata for the output. The output blocks will contain the same column chunk
* metadata but with the file offsets updated to reflect the new position of the column data
* as written to the output.
*
* @param in the input stream for the original Parquet file
* @param out the output stream to receive the data
* @return updated block metadata corresponding to the output
*/
private def copyBlocksData(
in: FSDataInputStream,
out: HostMemoryOutputStream,
Expand Down Expand Up @@ -675,12 +675,12 @@ object ParquetPartitionReader {
private case class CopyRange(offset: Long, length: Long)

/**
* Build a new BlockMetaData
*
* @param rowCount the number of rows in this block
* @param columns the new column chunks to reference in the new BlockMetaData
* @return the new BlockMetaData
*/
* Build a new BlockMetaData
*
* @param rowCount the number of rows in this block
* @param columns the new column chunks to reference in the new BlockMetaData
* @return the new BlockMetaData
*/
private def newParquetBlock(
rowCount: Long,
columns: Seq[ColumnChunkMetaData]): BlockMetaData = {
Expand All @@ -698,14 +698,14 @@ object ParquetPartitionReader {
}

/**
* Trim block metadata to contain only the column chunks that occur in the specified columns.
* The column chunks that are returned are preserved verbatim
* (i.e.: file offsets remain unchanged).
*
* @param columnPaths the paths of columns to preserve
* @param blocks the block metadata from the original Parquet file
* @return the updated block metadata with undesired column chunks removed
*/
* Trim block metadata to contain only the column chunks that occur in the specified columns.
* The column chunks that are returned are preserved verbatim
* (i.e.: file offsets remain unchanged).
*
* @param columnPaths the paths of columns to preserve
* @param blocks the block metadata from the original Parquet file
* @return the updated block metadata with undesired column chunks removed
*/
private[spark] def clipBlocks(columnPaths: Seq[ColumnPath],
blocks: Seq[BlockMetaData]): Seq[BlockMetaData] = {
val pathSet = columnPaths.toSet
Expand Down
Loading

0 comments on commit 9f947b7

Please sign in to comment.