feat: initial formatting

vibhatha · May 25, 2024 · 0b980df · 0b980df
1 parent 5d0431b
commit 0b980df
Show file tree

Hide file tree

Showing 54 changed files with 1,266 additions and 1,137 deletions.
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
@@ -21,7 +21,7 @@
   <description>(Experimental/Contrib) A collection of algorithms for working with ValueVectors.</description>
 
   <properties>
-    <spotless.version>2.42.0</spotless.version>
+    <spotless.version>2.30.0</spotless.version>
   </properties>
 
   <dependencies>
@@ -53,6 +53,16 @@
   </dependencies>
 
   <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>3.1.0</version>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+    </plugins>
   </build>
 
   <profiles>
@@ -92,7 +102,7 @@
               </formats>
               <java>
                 <googleJavaFormat>
-                  <version>1.9</version>
+                  <version>1.17.0</version>
                   <style>GOOGLE</style>
                 </googleJavaFormat>
               </java>
@@ -114,6 +124,7 @@
               <execution>
                 <id>spotless-check</id>
                 <goals>
+                  <goal>apply</goal>
                   <goal>check</goal>
                 </goals>
                 <phase>validate</phase>

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
@@ -26,18 +26,18 @@
 import org.apache.arrow.vector.compare.RangeEqualsVisitor;
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
 
-/**
- * Utilities for vector deduplication.
- */
+/** Utilities for vector deduplication. */
 class DeduplicationUtils {
 
   /**
    * Gets the start positions of the first distinct values in a vector.
+   *
    * @param vector the target vector.
    * @param runStarts the bit set to hold the start positions.
    * @param <V> vector type.
    */
-  public static <V extends ValueVector> void populateRunStartIndicators(V vector, ArrowBuf runStarts) {
+  public static <V extends ValueVector> void populateRunStartIndicators(
+      V vector, ArrowBuf runStarts) {
     int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount());
     Preconditions.checkArgument(runStarts.capacity() >= bufSize);
     runStarts.setZero(0, bufSize);
@@ -55,6 +55,7 @@ public static <V extends ValueVector> void populateRunStartIndicators(V vector,
 
   /**
    * Gets the run lengths, given the start positions.
+   *
    * @param runStarts the bit set for start positions.
    * @param runLengths the run length vector to populate.
    * @param valueCount the number of values in the bit set.
@@ -76,15 +77,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths,
   }
 
   /**
-   * Gets distinct values from the input vector by removing adjacent
-   * duplicated values.
+   * Gets distinct values from the input vector by removing adjacent duplicated values.
+   *
    * @param indicators the bit set containing the start positions of distinct values.
    * @param inputVector the input vector.
    * @param outputVector the output vector.
    * @param <V> vector type.
    */
   public static <V extends ValueVector> void populateDeduplicatedValues(
-          ArrowBuf indicators, V inputVector, V outputVector) {
+      ArrowBuf indicators, V inputVector, V outputVector) {
     int dstIdx = 0;
     for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) {
       if (BitVectorHelper.get(indicators, srcIdx) != 0) {

diff --git a/...algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/...algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
@@ -26,29 +26,28 @@
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
 
 /**
- * Remove adjacent equal elements from a vector.
- * If the vector is sorted, it removes all duplicated values in the vector.
+ * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated
+ * values in the vector.
+ *
  * @param <V> vector type.
  */
 public class VectorRunDeduplicator<V extends ValueVector> implements AutoCloseable {
 
   /**
-   * Bit set for distinct values.
-   * If the value at some index is not equal to the previous value,
-   * its bit is set to 1, otherwise its bit is set to 0.
+   * Bit set for distinct values. If the value at some index is not equal to the previous value, its
+   * bit is set to 1, otherwise its bit is set to 0.
    */
   private ArrowBuf distinctValueBuffer;
 
-  /**
-   * The vector to deduplicate.
-   */
+  /** The vector to deduplicate. */
   private final V vector;
 
   private final BufferAllocator allocator;
 
   /**
    * Constructs a vector run deduplicator for a given vector.
-   * @param vector the vector to deduplicate.  Ownership is NOT taken.
+   *
+   * @param vector the vector to deduplicate. Ownership is NOT taken.
    * @param allocator the allocator used for allocating buffers for start indices.
    */
   public VectorRunDeduplicator(V vector, BufferAllocator allocator) {
@@ -65,17 +64,20 @@ private void createDistinctValueBuffer() {
 
   /**
    * Gets the number of values which are different from their predecessor.
+   *
    * @return the run count.
    */
   public int getRunCount() {
     if (distinctValueBuffer == null) {
       createDistinctValueBuffer();
     }
-    return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
+    return vector.getValueCount()
+        - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
   }
 
   /**
    * Gets the vector with deduplicated adjacent values removed.
+   *
    * @param outVector the output vector.
    */
   public void populateDeduplicatedValues(V outVector) {
@@ -88,14 +90,16 @@ public void populateDeduplicatedValues(V outVector) {
 
   /**
    * Gets the length of each distinct value.
+   *
    * @param lengthVector the vector for holding length values.
    */
   public void populateRunLengths(IntVector lengthVector) {
     if (distinctValueBuffer == null) {
       createDistinctValueBuffer();
     }
 
-    DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount());
+    DeduplicationUtils.populateRunLengths(
+        distinctValueBuffer, lengthVector, vector.getValueCount());
   }
 
   @Override

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
@@ -20,27 +20,26 @@
 import org.apache.arrow.vector.ValueVector;
 
 /**
- * A dictionary builder is intended for the scenario frequently encountered in practice:
- * the dictionary is not known a priori, so it is generated dynamically.
- * In particular, when a new value arrives, it is tested to check if it is already
- * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary.
- * <p>
- *   The dictionary builder is intended to build a single dictionary.
- *   So it cannot be used for different dictionaries.
- * </p>
+ * A dictionary builder is intended for the scenario frequently encountered in practice: the
+ * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value
+ * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected,
+ * otherwise, it is added to the dictionary.
+ *
+ * <p>The dictionary builder is intended to build a single dictionary. So it cannot be used for
+ * different dictionaries.
+ *
  * <p>Below gives the sample code for using the dictionary builder
+ *
  * <pre>{@code
  * DictionaryBuilder dictionaryBuilder = ...
  * ...
  * dictionaryBuild.addValue(newValue);
  * ...
  * }</pre>
- * </p>
- * <p>
- *   With the above code, the dictionary vector will be populated,
- *   and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method.
- *   After that, dictionary encoding can proceed with the populated dictionary..
- * </p>
+ *
+ * <p>With the above code, the dictionary vector will be populated, and it can be retrieved by the
+ * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed
+ * with the populated dictionary..
  *
  * @param <V> the dictionary vector type.
  */
@@ -58,7 +57,7 @@ public interface DictionaryBuilder<V extends ValueVector> {
    * Try to add an element from the target vector to the dictionary.
    *
    * @param targetVector the target vector containing new element.
-   * @param targetIndex  the index of the new element in the target vector.
+   * @param targetIndex the index of the new element in the target vector.
    * @return the index of the new element in the dictionary.
    */
   int addValue(V targetVector, int targetIndex);

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
@@ -22,18 +22,20 @@
 
 /**
  * A dictionary encoder translates one vector into another one based on a dictionary vector.
- * According to Arrow specification, the encoded vector must be an integer based vector, which
- * is the index of the original vector element in the dictionary.
+ * According to Arrow specification, the encoded vector must be an integer based vector, which is
+ * the index of the original vector element in the dictionary.
+ *
  * @param <E> type of the encoded vector.
  * @param <D> type of the vector to encode. It is also the type of the dictionary vector.
  */
 public interface DictionaryEncoder<E extends BaseIntVector, D extends ValueVector> {
 
   /**
    * Translates an input vector into an output vector.
+   *
    * @param input the input vector.
-   * @param output the output vector. Note that it must be in a fresh state. At least,
-   *     all its validity bits should be clear.
+   * @param output the output vector. Note that it must be in a fresh state. At least, all its
+   *     validity bits should be clear.
    */
   void encode(D input, E output);
 }
diff --git a/.../src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java b/.../src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java
@@ -18,45 +18,36 @@
 package org.apache.arrow.algorithm.dictionary;
 
 import java.util.HashMap;
-
 import org.apache.arrow.memory.util.ArrowBufPointer;
 import org.apache.arrow.memory.util.hash.ArrowBufHasher;
 import org.apache.arrow.memory.util.hash.SimpleHasher;
 import org.apache.arrow.vector.ElementAddressableVector;
 
 /**
- * This class builds the dictionary based on a hash table.
- * Each add operation can be finished in O(1) time,
- * where n is the current dictionary size.
+ * This class builds the dictionary based on a hash table. Each add operation can be finished in
+ * O(1) time, where n is the current dictionary size.
  *
  * @param <V> the dictionary vector type.
  */
-public class HashTableBasedDictionaryBuilder<V extends ElementAddressableVector> implements DictionaryBuilder<V> {
+public class HashTableBasedDictionaryBuilder<V extends ElementAddressableVector>
+    implements DictionaryBuilder<V> {
 
-  /**
-   * The dictionary to be built.
-   */
+  /** The dictionary to be built. */
   private final V dictionary;
 
-  /**
-   * If null should be encoded.
-   */
+  /** If null should be encoded. */
   private final boolean encodeNull;
 
   /**
-   * The hash map for distinct dictionary entries.
-   * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+   * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element,
+   * whereas the value is the index in the dictionary.
    */
   private HashMap<ArrowBufPointer, Integer> hashMap = new HashMap<>();
 
-  /**
-   * The hasher used for calculating the hash code.
-   */
+  /** The hasher used for calculating the hash code. */
   private final ArrowBufHasher hasher;
 
-  /**
-   * Next pointer to try to add to the hash table.
-   */
+  /** Next pointer to try to add to the hash table. */
   private ArrowBufPointer nextPointer;
 
   /**
@@ -83,7 +74,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) {
    *
    * @param dictionary the dictionary to populate.
    * @param encodeNull if null values should be added to the dictionary.
-   * @param hasher     the hasher used to compute the hash code.
+   * @param hasher the hasher used to compute the hash code.
    */
   public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) {
     this.dictionary = dictionary;
@@ -125,7 +116,7 @@ public int addValues(V targetVector) {
    * Try to add an element from the target vector to the dictionary.
    *
    * @param targetVector the target vector containing new element.
-   * @param targetIndex  the index of the new element in the target vector.
+   * @param targetIndex the index of the new element in the target vector.
    * @return the index of the new element in the dictionary.
    */
   @Override