Skip to content

Commit

Permalink
HBASE-27314 Make index block be customized and configured (#4763)
Browse files Browse the repository at this point in the history
Signed-off-by: Duo Zhang <[email protected]>
  • Loading branch information
binlijin authored Sep 8, 2022
1 parent 175f5af commit bfd8501
Show file tree
Hide file tree
Showing 18 changed files with 1,033 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.hadoop.hbase.exceptions.HBaseException;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.PrettyPrinter.Unit;
Expand Down Expand Up @@ -338,6 +339,11 @@ public DataBlockEncoding getDataBlockEncoding() {
return delegatee.getDataBlockEncoding();
}

@Override
public IndexBlockEncoding getIndexBlockEncoding() {
return delegatee.getIndexBlockEncoding();
}

/**
* Set data block encoding algorithm used in block cache.
* @param value What kind of data block encoding will be used.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.hadoop.hbase.MemoryCompactionPolicy;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.yetus.audience.InterfaceAudience;
Expand Down Expand Up @@ -107,6 +108,9 @@ public interface ColumnFamilyDescriptor {
/** Returns the data block encoding algorithm used in block cache and optionally on disk */
DataBlockEncoding getDataBlockEncoding();

/** Return the index block encoding algorithm used in block cache and optionally on disk */
IndexBlockEncoding getIndexBlockEncoding();

/** Returns Return the raw crypto key attribute for the family, or null if not set */
byte[] getEncryptionKey();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.hadoop.hbase.exceptions.HBaseException;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.PrettyPrinter;
Expand Down Expand Up @@ -84,6 +85,10 @@ public class ColumnFamilyDescriptorBuilder {
public static final String DATA_BLOCK_ENCODING = "DATA_BLOCK_ENCODING";
private static final Bytes DATA_BLOCK_ENCODING_BYTES =
new Bytes(Bytes.toBytes(DATA_BLOCK_ENCODING));
@InterfaceAudience.Private
public static final String INDEX_BLOCK_ENCODING = "INDEX_BLOCK_ENCODING";
private static final Bytes INDEX_BLOCK_ENCODING_BYTES =
new Bytes(Bytes.toBytes(INDEX_BLOCK_ENCODING));
/**
* Key for the BLOCKCACHE attribute. A more exact name would be CACHE_DATA_ON_READ because this
* flag sets whether or not we cache DATA blocks. We always cache INDEX and BLOOM blocks; caching
Expand Down Expand Up @@ -199,6 +204,11 @@ public class ColumnFamilyDescriptorBuilder {
*/
public static final DataBlockEncoding DEFAULT_DATA_BLOCK_ENCODING = DataBlockEncoding.NONE;

/**
* Default index block encoding algorithm.
*/
public static final IndexBlockEncoding DEFAULT_INDEX_BLOCK_ENCODING = IndexBlockEncoding.NONE;

/**
* Default number of versions of a record to keep.
*/
Expand Down Expand Up @@ -301,6 +311,7 @@ public static Map<String, String> getDefaultValues() {
DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
DEFAULT_VALUES.put(INDEX_BLOCK_ENCODING, String.valueOf(DEFAULT_INDEX_BLOCK_ENCODING));
// Do NOT add this key/value by default. NEW_VERSION_BEHAVIOR is NOT defined in hbase1 so
// it is not possible to make an hbase1 HCD the same as an hbase2 HCD and so the replication
// compare of schemas will fail. It is OK not adding the below to the initial map because of
Expand Down Expand Up @@ -501,6 +512,11 @@ public ColumnFamilyDescriptorBuilder setDataBlockEncoding(DataBlockEncoding valu
return this;
}

public ColumnFamilyDescriptorBuilder setIndexBlockEncoding(IndexBlockEncoding value) {
desc.setIndexBlockEncoding(value);
return this;
}

public ColumnFamilyDescriptorBuilder setEncryptionKey(final byte[] value) {
desc.setEncryptionKey(value);
return this;
Expand Down Expand Up @@ -832,6 +848,22 @@ public ModifyableColumnFamilyDescriptor setDataBlockEncoding(DataBlockEncoding t
type == null ? DataBlockEncoding.NONE.name() : type.name());
}

@Override
public IndexBlockEncoding getIndexBlockEncoding() {
return getStringOrDefault(INDEX_BLOCK_ENCODING_BYTES,
n -> IndexBlockEncoding.valueOf(n.toUpperCase()), IndexBlockEncoding.NONE);
}

/**
* Set index block encoding algorithm used in block cache.
* @param type What kind of index block encoding will be used.
* @return this (for chained invocation)
*/
public ModifyableColumnFamilyDescriptor setIndexBlockEncoding(IndexBlockEncoding type) {
return setValue(INDEX_BLOCK_ENCODING_BYTES,
type == null ? IndexBlockEncoding.NONE.name() : type.name());
}

/**
* Set whether the tags should be compressed along with DataBlockEncoding. When no
* DataBlockEncoding is been used, this is having no effect. n * @return this (for chained
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
Expand Down Expand Up @@ -224,7 +225,7 @@ public void testSetBlocksize() throws HBaseException {
@Test
public void testDefaultBuilder() {
final Map<String, String> defaultValueMap = ColumnFamilyDescriptorBuilder.getDefaultValues();
assertEquals(defaultValueMap.size(), 11);
assertEquals(defaultValueMap.size(), 12);
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.BLOOMFILTER),
BloomType.ROW.toString());
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.REPLICATION_SCOPE), "0");
Expand All @@ -244,6 +245,8 @@ public void testDefaultBuilder() {
KeepDeletedCells.FALSE.toString());
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.DATA_BLOCK_ENCODING),
DataBlockEncoding.NONE.toString());
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.INDEX_BLOCK_ENCODING),
IndexBlockEncoding.NONE.toString());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.encoding;

import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.yetus.audience.InterfaceAudience;

/**
* Provide access to all index block encoding algorithms. All of the algorithms are required to have
* unique id which should <b>NEVER</b> be changed. If you want to add a new algorithm/version,
* assign it a new id. Announce the new id in the HBase mailing list to prevent collisions.
*/
@InterfaceAudience.Public
public enum IndexBlockEncoding {

/** Disable index block encoding. */
NONE(0, null),
// id 1 is reserved for the PREFIX_TREE algorithm to be added later
PREFIX_TREE(1, null);

private final short id;
private final byte[] idInBytes;
private final String encoderCls;

public static final int ID_SIZE = Bytes.SIZEOF_SHORT;

/** Maps data block encoding ids to enum instances. */
private static IndexBlockEncoding[] idArray = new IndexBlockEncoding[Byte.MAX_VALUE + 1];

static {
for (IndexBlockEncoding algo : values()) {
if (idArray[algo.id] != null) {
throw new RuntimeException(
String.format("Two data block encoder algorithms '%s' and '%s' have " + "the same id %d",
idArray[algo.id].toString(), algo.toString(), (int) algo.id));
}
idArray[algo.id] = algo;
}
}

private IndexBlockEncoding(int id, String encoderClsName) {
if (id < 0 || id > Byte.MAX_VALUE) {
throw new AssertionError("Data block encoding algorithm id is out of range: " + id);
}
this.id = (short) id;
this.idInBytes = Bytes.toBytes(this.id);
if (idInBytes.length != ID_SIZE) {
// White this may seem redundant, if we accidentally serialize
// the id as e.g. an int instead of a short, all encoders will break.
throw new RuntimeException("Unexpected length of encoder ID byte " + "representation: "
+ Bytes.toStringBinary(idInBytes));
}
this.encoderCls = encoderClsName;
}

/** Returns name converted to bytes. */
public byte[] getNameInBytes() {
return Bytes.toBytes(toString());
}

/** Returns The id of a data block encoder. */
public short getId() {
return id;
}

/**
* Writes id in bytes.
* @param stream where the id should be written.
*/
public void writeIdInBytes(OutputStream stream) throws IOException {
stream.write(idInBytes);
}

/**
* Writes id bytes to the given array starting from offset.
* @param dest output array
* @param offset starting offset of the output array n
*/
public void writeIdInBytes(byte[] dest, int offset) throws IOException {
System.arraycopy(idInBytes, 0, dest, offset, ID_SIZE);
}

/**
* Find and return the name of data block encoder for the given id.
* @param encoderId id of data block encoder
* @return name, same as used in options in column family
*/
public static String getNameFromId(short encoderId) {
return getEncodingById(encoderId).toString();
}

public static IndexBlockEncoding getEncodingById(short indexBlockEncodingId) {
IndexBlockEncoding algorithm = null;
if (indexBlockEncodingId >= 0 && indexBlockEncodingId <= Byte.MAX_VALUE) {
algorithm = idArray[indexBlockEncodingId];
}
if (algorithm == null) {
throw new IllegalArgumentException(String
.format("There is no index block encoder for given id '%d'", (int) indexBlockEncodingId));
}
return algorithm;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.crypto.Encryption;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.ClassSize;
Expand Down Expand Up @@ -58,6 +59,7 @@ public class HFileContext implements HeapSize, Cloneable {
/** Number of uncompressed bytes we allow per block. */
private int blockSize = HConstants.DEFAULT_BLOCKSIZE;
private DataBlockEncoding encoding = DataBlockEncoding.NONE;
private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
/** Encryption algorithm and key used */
private Encryption.Context cryptoContext = Encryption.Context.NONE;
private long fileCreateTime;
Expand Down Expand Up @@ -89,13 +91,14 @@ public HFileContext(HFileContext context) {
this.columnFamily = context.columnFamily;
this.tableName = context.tableName;
this.cellComparator = context.cellComparator;
this.indexBlockEncoding = context.indexBlockEncoding;
}

HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags,
Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType,
int bytesPerChecksum, int blockSize, DataBlockEncoding encoding,
Encryption.Context cryptoContext, long fileCreateTime, String hfileName, byte[] columnFamily,
byte[] tableName, CellComparator cellComparator) {
byte[] tableName, CellComparator cellComparator, IndexBlockEncoding indexBlockEncoding) {
this.usesHBaseChecksum = useHBaseChecksum;
this.includesMvcc = includesMvcc;
this.includesTags = includesTags;
Expand All @@ -107,6 +110,9 @@ public HFileContext(HFileContext context) {
if (encoding != null) {
this.encoding = encoding;
}
if (indexBlockEncoding != null) {
this.indexBlockEncoding = indexBlockEncoding;
}
this.cryptoContext = cryptoContext;
this.fileCreateTime = fileCreateTime;
this.hfileName = hfileName;
Expand Down Expand Up @@ -186,6 +192,10 @@ public DataBlockEncoding getDataBlockEncoding() {
return encoding;
}

public IndexBlockEncoding getIndexBlockEncoding() {
return indexBlockEncoding;
}

public Encryption.Context getEncryptionContext() {
return cryptoContext;
}
Expand Down Expand Up @@ -253,6 +263,8 @@ public String toString() {
sb.append(blockSize);
sb.append(", encoding=");
sb.append(encoding);
sb.append(", indexBlockEncoding=");
sb.append(indexBlockEncoding);
sb.append(", includesMvcc=");
sb.append(includesMvcc);
sb.append(", includesTags=");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.crypto.Encryption;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.yetus.audience.InterfaceAudience;

Expand Down Expand Up @@ -50,6 +51,8 @@ public class HFileContextBuilder {
/** Number of uncompressed bytes we allow per block. */
private int blockSize = HConstants.DEFAULT_BLOCKSIZE;
private DataBlockEncoding encoding = DataBlockEncoding.NONE;
/** the index block encoding type **/
private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
/** Crypto context */
private Encryption.Context cryptoContext = Encryption.Context.NONE;
private long fileCreateTime = 0;
Expand Down Expand Up @@ -128,6 +131,11 @@ public HFileContextBuilder withDataBlockEncoding(DataBlockEncoding encoding) {
return this;
}

public HFileContextBuilder withIndexBlockEncoding(IndexBlockEncoding indexBlockEncoding) {
this.indexBlockEncoding = indexBlockEncoding;
return this;
}

public HFileContextBuilder withEncryptionContext(Encryption.Context cryptoContext) {
this.cryptoContext = cryptoContext;
return this;
Expand Down Expand Up @@ -161,6 +169,6 @@ public HFileContextBuilder withCellComparator(CellComparator cellComparator) {
public HFileContext build() {
return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression,
compressTags, checkSumType, bytesPerChecksum, blockSize, encoding, cryptoContext,
fileCreateTime, hfileName, columnFamily, tableName, cellComparator);
fileCreateTime, hfileName, columnFamily, tableName, cellComparator, indexBlockEncoding);
}
}
Loading

0 comments on commit bfd8501

Please sign in to comment.