apache · binlijin · Sep 8, 2022 · Sep 5, 2022
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
@@ -26,6 +26,7 @@
 import org.apache.hadoop.hbase.exceptions.HBaseException;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.PrettyPrinter.Unit;
@@ -338,6 +339,11 @@ public DataBlockEncoding getDataBlockEncoding() {
     return delegatee.getDataBlockEncoding();
   }
 
+  @Override
+  public IndexBlockEncoding getIndexBlockEncoding() {
+    return delegatee.getIndexBlockEncoding();
+  }
+
   /**
    * Set data block encoding algorithm used in block cache.
    * @param value What kind of data block encoding will be used.

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptor.java
@@ -24,6 +24,7 @@
 import org.apache.hadoop.hbase.MemoryCompactionPolicy;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -107,6 +108,9 @@ public interface ColumnFamilyDescriptor {
   /** Returns the data block encoding algorithm used in block cache and optionally on disk */
   DataBlockEncoding getDataBlockEncoding();
 
+  /** Return the index block encoding algorithm used in block cache and optionally on disk */
+  IndexBlockEncoding getIndexBlockEncoding();
+
   /** Returns Return the raw crypto key attribute for the family, or null if not set */
   byte[] getEncryptionKey();
 

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java
@@ -31,6 +31,7 @@
 import org.apache.hadoop.hbase.exceptions.HBaseException;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.PrettyPrinter;
@@ -84,6 +85,10 @@ public class ColumnFamilyDescriptorBuilder {
   public static final String DATA_BLOCK_ENCODING = "DATA_BLOCK_ENCODING";
   private static final Bytes DATA_BLOCK_ENCODING_BYTES =
     new Bytes(Bytes.toBytes(DATA_BLOCK_ENCODING));
+  @InterfaceAudience.Private
+  public static final String INDEX_BLOCK_ENCODING = "INDEX_BLOCK_ENCODING";
+  private static final Bytes INDEX_BLOCK_ENCODING_BYTES =
+    new Bytes(Bytes.toBytes(INDEX_BLOCK_ENCODING));
   /**
    * Key for the BLOCKCACHE attribute. A more exact name would be CACHE_DATA_ON_READ because this
    * flag sets whether or not we cache DATA blocks. We always cache INDEX and BLOOM blocks; caching
@@ -199,6 +204,11 @@ public class ColumnFamilyDescriptorBuilder {
    */
   public static final DataBlockEncoding DEFAULT_DATA_BLOCK_ENCODING = DataBlockEncoding.NONE;
 
+  /**
+   * Default index block encoding algorithm.
+   */
+  public static final IndexBlockEncoding DEFAULT_INDEX_BLOCK_ENCODING = IndexBlockEncoding.NONE;
+
   /**
    * Default number of versions of a record to keep.
    */
@@ -301,6 +311,7 @@ public static Map<String, String> getDefaultValues() {
     DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
     DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
     DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
+    DEFAULT_VALUES.put(INDEX_BLOCK_ENCODING, String.valueOf(DEFAULT_INDEX_BLOCK_ENCODING));
     // Do NOT add this key/value by default. NEW_VERSION_BEHAVIOR is NOT defined in hbase1 so
     // it is not possible to make an hbase1 HCD the same as an hbase2 HCD and so the replication
     // compare of schemas will fail. It is OK not adding the below to the initial map because of
@@ -501,6 +512,11 @@ public ColumnFamilyDescriptorBuilder setDataBlockEncoding(DataBlockEncoding valu
     return this;
   }
 
+  public ColumnFamilyDescriptorBuilder setIndexBlockEncoding(IndexBlockEncoding value) {
+    desc.setIndexBlockEncoding(value);
+    return this;
+  }
+
   public ColumnFamilyDescriptorBuilder setEncryptionKey(final byte[] value) {
     desc.setEncryptionKey(value);
     return this;
@@ -832,6 +848,22 @@ public ModifyableColumnFamilyDescriptor setDataBlockEncoding(DataBlockEncoding t
         type == null ? DataBlockEncoding.NONE.name() : type.name());
     }
 
+    @Override
+    public IndexBlockEncoding getIndexBlockEncoding() {
+      return getStringOrDefault(INDEX_BLOCK_ENCODING_BYTES,
+        n -> IndexBlockEncoding.valueOf(n.toUpperCase()), IndexBlockEncoding.NONE);
+    }
+
+    /**
+     * Set index block encoding algorithm used in block cache.
+     * @param type What kind of index block encoding will be used.
+     * @return this (for chained invocation)
+     */
+    public ModifyableColumnFamilyDescriptor setIndexBlockEncoding(IndexBlockEncoding type) {
+      return setValue(INDEX_BLOCK_ENCODING_BYTES,
+        type == null ? IndexBlockEncoding.NONE.name() : type.name());
+    }
+
     /**
      * Set whether the tags should be compressed along with DataBlockEncoding. When no
      * DataBlockEncoding is been used, this is having no effect. n * @return this (for chained

diff --git a/...lient/src/test/java/org/apache/hadoop/hbase/client/TestColumnFamilyDescriptorBuilder.java b/...lient/src/test/java/org/apache/hadoop/hbase/client/TestColumnFamilyDescriptorBuilder.java
@@ -31,6 +31,7 @@
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.testclassification.MiscTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -224,7 +225,7 @@ public void testSetBlocksize() throws HBaseException {
   @Test
   public void testDefaultBuilder() {
     final Map<String, String> defaultValueMap = ColumnFamilyDescriptorBuilder.getDefaultValues();
-    assertEquals(defaultValueMap.size(), 11);
+    assertEquals(defaultValueMap.size(), 12);
     assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.BLOOMFILTER),
       BloomType.ROW.toString());
     assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.REPLICATION_SCOPE), "0");
@@ -244,6 +245,8 @@ public void testDefaultBuilder() {
       KeepDeletedCells.FALSE.toString());
     assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.DATA_BLOCK_ENCODING),
       DataBlockEncoding.NONE.toString());
+    assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.INDEX_BLOCK_ENCODING),
+      IndexBlockEncoding.NONE.toString());
   }
 
   @Test

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/IndexBlockEncoding.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.encoding;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Provide access to all index block encoding algorithms. All of the algorithms are required to have
+ * unique id which should <b>NEVER</b> be changed. If you want to add a new algorithm/version,
+ * assign it a new id. Announce the new id in the HBase mailing list to prevent collisions.
+ */
+@InterfaceAudience.Public
+public enum IndexBlockEncoding {
+
+  /** Disable index block encoding. */
+  NONE(0, null),
+  // id 1 is reserved for the PREFIX_TREE algorithm to be added later
+  PREFIX_TREE(1, null);
+
+  private final short id;
+  private final byte[] idInBytes;
+  private final String encoderCls;
+
+  public static final int ID_SIZE = Bytes.SIZEOF_SHORT;
+
+  /** Maps data block encoding ids to enum instances. */
+  private static IndexBlockEncoding[] idArray = new IndexBlockEncoding[Byte.MAX_VALUE + 1];
+
+  static {
+    for (IndexBlockEncoding algo : values()) {
+      if (idArray[algo.id] != null) {
+        throw new RuntimeException(
+          String.format("Two data block encoder algorithms '%s' and '%s' have " + "the same id %d",
+            idArray[algo.id].toString(), algo.toString(), (int) algo.id));
+      }
+      idArray[algo.id] = algo;
+    }
+  }
+
+  private IndexBlockEncoding(int id, String encoderClsName) {
+    if (id < 0 || id > Byte.MAX_VALUE) {
+      throw new AssertionError("Data block encoding algorithm id is out of range: " + id);
+    }
+    this.id = (short) id;
+    this.idInBytes = Bytes.toBytes(this.id);
+    if (idInBytes.length != ID_SIZE) {
+      // White this may seem redundant, if we accidentally serialize
+      // the id as e.g. an int instead of a short, all encoders will break.
+      throw new RuntimeException("Unexpected length of encoder ID byte " + "representation: "
+        + Bytes.toStringBinary(idInBytes));
+    }
+    this.encoderCls = encoderClsName;
+  }
+
+  /** Returns name converted to bytes. */
+  public byte[] getNameInBytes() {
+    return Bytes.toBytes(toString());
+  }
+
+  /** Returns The id of a data block encoder. */
+  public short getId() {
+    return id;
+  }
+
+  /**
+   * Writes id in bytes.
+   * @param stream where the id should be written.
+   */
+  public void writeIdInBytes(OutputStream stream) throws IOException {
+    stream.write(idInBytes);
+  }
+
+  /**
+   * Writes id bytes to the given array starting from offset.
+   * @param dest   output array
+   * @param offset starting offset of the output array n
+   */
+  public void writeIdInBytes(byte[] dest, int offset) throws IOException {
+    System.arraycopy(idInBytes, 0, dest, offset, ID_SIZE);
+  }
+
+  /**
+   * Find and return the name of data block encoder for the given id.
+   * @param encoderId id of data block encoder
+   * @return name, same as used in options in column family
+   */
+  public static String getNameFromId(short encoderId) {
+    return getEncodingById(encoderId).toString();
+  }
+
+  public static IndexBlockEncoding getEncodingById(short indexBlockEncodingId) {
+    IndexBlockEncoding algorithm = null;
+    if (indexBlockEncodingId >= 0 && indexBlockEncodingId <= Byte.MAX_VALUE) {
+      algorithm = idArray[indexBlockEncodingId];
+    }
+    if (algorithm == null) {
+      throw new IllegalArgumentException(String
+        .format("There is no index block encoder for given id '%d'", (int) indexBlockEncodingId));
+    }
+    return algorithm;
+  }
+
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java
@@ -24,6 +24,7 @@
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.crypto.Encryption;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ChecksumType;
 import org.apache.hadoop.hbase.util.ClassSize;
@@ -58,6 +59,7 @@ public class HFileContext implements HeapSize, Cloneable {
   /** Number of uncompressed bytes we allow per block. */
   private int blockSize = HConstants.DEFAULT_BLOCKSIZE;
   private DataBlockEncoding encoding = DataBlockEncoding.NONE;
+  private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
   /** Encryption algorithm and key used */
   private Encryption.Context cryptoContext = Encryption.Context.NONE;
   private long fileCreateTime;
@@ -89,13 +91,14 @@ public HFileContext(HFileContext context) {
     this.columnFamily = context.columnFamily;
     this.tableName = context.tableName;
     this.cellComparator = context.cellComparator;
+    this.indexBlockEncoding = context.indexBlockEncoding;
   }
 
   HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags,
     Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType,
     int bytesPerChecksum, int blockSize, DataBlockEncoding encoding,
     Encryption.Context cryptoContext, long fileCreateTime, String hfileName, byte[] columnFamily,
-    byte[] tableName, CellComparator cellComparator) {
+    byte[] tableName, CellComparator cellComparator, IndexBlockEncoding indexBlockEncoding) {
     this.usesHBaseChecksum = useHBaseChecksum;
     this.includesMvcc = includesMvcc;
     this.includesTags = includesTags;
@@ -107,6 +110,9 @@ public HFileContext(HFileContext context) {
     if (encoding != null) {
       this.encoding = encoding;
     }
+    if (indexBlockEncoding != null) {
+      this.indexBlockEncoding = indexBlockEncoding;
+    }
     this.cryptoContext = cryptoContext;
     this.fileCreateTime = fileCreateTime;
     this.hfileName = hfileName;
@@ -186,6 +192,10 @@ public DataBlockEncoding getDataBlockEncoding() {
     return encoding;
   }
 
+  public IndexBlockEncoding getIndexBlockEncoding() {
+    return indexBlockEncoding;
+  }
+
   public Encryption.Context getEncryptionContext() {
     return cryptoContext;
   }
@@ -253,6 +263,8 @@ public String toString() {
     sb.append(blockSize);
     sb.append(", encoding=");
     sb.append(encoding);
+    sb.append(", indexBlockEncoding=");
+    sb.append(indexBlockEncoding);
     sb.append(", includesMvcc=");
     sb.append(includesMvcc);
     sb.append(", includesTags=");

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java
@@ -22,6 +22,7 @@
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.crypto.Encryption;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
 import org.apache.hadoop.hbase.util.ChecksumType;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -50,6 +51,8 @@ public class HFileContextBuilder {
   /** Number of uncompressed bytes we allow per block. */
   private int blockSize = HConstants.DEFAULT_BLOCKSIZE;
   private DataBlockEncoding encoding = DataBlockEncoding.NONE;
+  /** the index block encoding type **/
+  private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
   /** Crypto context */
   private Encryption.Context cryptoContext = Encryption.Context.NONE;
   private long fileCreateTime = 0;
@@ -128,6 +131,11 @@ public HFileContextBuilder withDataBlockEncoding(DataBlockEncoding encoding) {
     return this;
   }
 
+  public HFileContextBuilder withIndexBlockEncoding(IndexBlockEncoding indexBlockEncoding) {
+    this.indexBlockEncoding = indexBlockEncoding;
+    return this;
+  }
+
   public HFileContextBuilder withEncryptionContext(Encryption.Context cryptoContext) {
     this.cryptoContext = cryptoContext;
     return this;
@@ -161,6 +169,6 @@ public HFileContextBuilder withCellComparator(CellComparator cellComparator) {
   public HFileContext build() {
     return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression,
       compressTags, checkSumType, bytesPerChecksum, blockSize, encoding, cryptoContext,
-      fileCreateTime, hfileName, columnFamily, tableName, cellComparator);
+      fileCreateTime, hfileName, columnFamily, tableName, cellComparator, indexBlockEncoding);
   }
 }