From 54f2106f33d02ed6880947cae19c06a4ea113ec2 Mon Sep 17 00:00:00 2001 From: Abhradeep Kundu Date: Mon, 8 Aug 2022 16:13:06 +0530 Subject: [PATCH] HBASE-27265 : Tool to read StoreFileTrackerFile (#4673) Signed-off-by: Wellington Chevreuil Signed-off-by: Duo Zhang --- bin/hbase | 3 + bin/hbase.cmd | 5 + .../storefiletracker/StoreFileListFile.java | 10 +- .../StoreFileListFilePrettyPrinter.java | 227 ++++++++++++++++++ .../TestStoreFileListFilePrinter.java | 168 +++++++++++++ 5 files changed, 410 insertions(+), 3 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFilePrettyPrinter.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/storefiletracker/TestStoreFileListFilePrinter.java diff --git a/bin/hbase b/bin/hbase index 1754d2982eca..df73d4713d0b 100755 --- a/bin/hbase +++ b/bin/hbase @@ -83,6 +83,7 @@ show_usage() { if [ "${in_omnibus_tarball}" = "true" ]; then echo " wal Write-ahead-log analyzer" echo " hfile Store file analyzer" + echo " sft Store file tracker viewer" echo " zkcli Run the ZooKeeper shell" echo " master Run an HBase HMaster node" echo " regionserver Run an HBase HRegionServer node" @@ -597,6 +598,8 @@ elif [ "$COMMAND" = "wal" ] ; then CLASS='org.apache.hadoop.hbase.wal.WALPrettyPrinter' elif [ "$COMMAND" = "hfile" ] ; then CLASS='org.apache.hadoop.hbase.io.hfile.HFilePrettyPrinter' +elif [ "$COMMAND" = "sft" ] ; then + CLASS='org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileListFilePrettyPrinter' elif [ "$COMMAND" = "zkcli" ] ; then CLASS="org.apache.hadoop.hbase.zookeeper.ZKMainServer" for f in $HBASE_HOME/lib/zkcli/*.jar; do diff --git a/bin/hbase.cmd b/bin/hbase.cmd index 3b569099090f..f8111a3bc0a9 100644 --- a/bin/hbase.cmd +++ b/bin/hbase.cmd @@ -435,6 +435,10 @@ goto :eof set CLASS=org.apache.hadoop.hbase.io.hfile.HFile goto :eof +:sft + set CLASS=org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileListFilePrettyPrinter + goto :eof + :zkcli set CLASS=org.apache.hadoop.hbase.zookeeper.ZKMainServer set CLASSPATH=!CLASSPATH!;%HBASE_HOME%\lib\zkcli\* @@ -468,6 +472,7 @@ goto :eof echo hbck Run the hbase 'fsck' tool echo wal Write-ahead-log analyzer echo hfile Store file analyzer + echo sft Store file tracker viewer echo zkcli Run the ZooKeeper shell echo master Run an HBase HMaster node echo regionserver Run an HBase HRegionServer node diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java index 9328e5efb960..e3d2a182348c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java @@ -73,7 +73,7 @@ class StoreFileListFile { private static final char TRACK_FILE_SEPARATOR = '.'; - private static final Pattern TRACK_FILE_PATTERN = Pattern.compile("^f(1|2)\\.\\d+$"); + static final Pattern TRACK_FILE_PATTERN = Pattern.compile("^f(1|2)\\.\\d+$"); // 16 MB, which is big enough for a tracker file private static final int MAX_FILE_SIZE = 16 * 1024 * 1024; @@ -94,8 +94,7 @@ class StoreFileListFile { trackFileDir = new Path(ctx.getFamilyStoreDirectoryPath(), TRACK_FILE_DIR); } - private StoreFileList load(Path path) throws IOException { - FileSystem fs = ctx.getRegionFileSystem().getFileSystem(); + static StoreFileList load(FileSystem fs, Path path) throws IOException { byte[] data; int expectedChecksum; try (FSDataInputStream in = fs.open(path)) { @@ -118,6 +117,11 @@ private StoreFileList load(Path path) throws IOException { return StoreFileList.parseFrom(data); } + StoreFileList load(Path path) throws IOException { + FileSystem fs = ctx.getRegionFileSystem().getFileSystem(); + return load(fs, path); + } + private int select(StoreFileList[] lists) { if (lists[0] == null) { return 1; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFilePrettyPrinter.java new file mode 100644 index 000000000000..9338f2f63320 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFilePrettyPrinter.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.storefiletracker; + +import java.io.IOException; +import java.io.PrintStream; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; +import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; +import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionGroup; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; +import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; +import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileList; + +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) +@InterfaceStability.Evolving +public class StoreFileListFilePrettyPrinter extends Configured implements Tool { + private static final Logger LOG = LoggerFactory.getLogger(StoreFileListFilePrettyPrinter.class); + + private Options options = new Options(); + + private final String fileOption = "f"; + private final String columnFamilyOption = "cf"; + private final String regionOption = "r"; + private final String tableNameOption = "t"; + + private String namespace; + private String regionName; + private String columnFamily; + private String tableName; + private Path path; + private PrintStream err = System.err; + private PrintStream out = System.out; + + public StoreFileListFilePrettyPrinter() { + super(); + init(); + } + + public StoreFileListFilePrettyPrinter(Configuration conf) { + super(conf); + init(); + } + + private void init() { + OptionGroup files = new OptionGroup(); + options.addOption(new Option(tableNameOption, "table", true, + "Table to scan. Pass table name; e.g. test_table")); + options.addOption(new Option(columnFamilyOption, "columnfamily", true, + "column family to scan. Pass column family name; e.g. f")); + files.addOption(new Option(regionOption, "region", true, + "Region to scan. Pass region name; e.g. '3d58e9067bf23e378e68c071f3dd39eb'")); + files.addOption(new Option(fileOption, "file", true, + "File to scan. Pass full-path; e.g. /root/hbase-3.0.0-alpha-4-SNAPSHOT/hbase-data/" + + "data/default/tbl-sft/093fa06bf84b3b631007f951a14b8457/f/.filelist/f2.1655139542249")); + options.addOptionGroup(files); + } + + public boolean parseOptions(String[] args) throws ParseException, IOException { + HelpFormatter formatter = new HelpFormatter(); + if (args.length == 0) { + formatter + .printHelp("sft [--file= | --table=" + + " --region= [--columnFamily=] ]", options, true); + return false; + } + + CommandLineParser parser = new PosixParser(); + CommandLine cmd = parser.parse(options, args); + + if (cmd.hasOption(fileOption)) { + path = new Path(cmd.getOptionValue(fileOption)); + } else { + regionName = cmd.getOptionValue(regionOption); + if (StringUtils.isEmpty(regionName)) { + err.println("Region name is not specified."); + formatter.printHelp("sft [--file= | --table= --region= [--columnFamily=] ]", options, true); + System.exit(1); + } + columnFamily = cmd.getOptionValue(columnFamilyOption); + if (StringUtils.isEmpty(columnFamily)) { + err.println("Column family is not specified."); + formatter.printHelp("sft [--file= | --table= --region= [--columnFamily=] ]", options, true); + System.exit(1); + } + String tableNameWtihNS = cmd.getOptionValue(tableNameOption); + if (StringUtils.isEmpty(tableNameWtihNS)) { + err.println("Table name is not specified."); + formatter.printHelp("sft [--file= | --table= --region= [--columnFamily=] ]", options, true); + System.exit(1); + } + TableName tn = TableName.valueOf(tableNameWtihNS); + namespace = tn.getNamespaceAsString(); + tableName = tn.getNameAsString(); + } + return true; + } + + public int run(String[] args) { + if (getConf() == null) { + throw new RuntimeException("A Configuration instance must be provided."); + } + boolean pass = true; + try { + CommonFSUtils.setFsDefault(getConf(), CommonFSUtils.getRootDir(getConf())); + if (!parseOptions(args)) { + return 1; + } + } catch (IOException ex) { + LOG.error("Error parsing command-line options", ex); + return 1; + } catch (ParseException ex) { + LOG.error("Error parsing command-line options", ex); + return 1; + } + FileSystem fs = null; + if (path != null) { + try { + fs = path.getFileSystem(getConf()); + if (fs.isDirectory(path)) { + err.println("ERROR, wrong path given: " + path); + return 2; + } + return print(fs, path); + } catch (IOException e) { + LOG.error("Error reading " + path, e); + return 2; + } + } else { + try { + Path root = CommonFSUtils.getRootDir(getConf()); + Path baseDir = new Path(root, HConstants.BASE_NAMESPACE_DIR); + Path nameSpacePath = new Path(baseDir, namespace); + Path tablePath = new Path(nameSpacePath, tableName); + Path regionPath = new Path(tablePath, regionName); + Path cfPath = new Path(regionPath, columnFamily); + Path sftPath = new Path(cfPath, StoreFileListFile.TRACK_FILE_DIR); + + fs = FileSystem.newInstance(regionPath.toUri(), getConf()); + + RemoteIterator iterator = fs.listFiles(sftPath, false); + + while (iterator.hasNext()) { + LocatedFileStatus lfs = iterator.next(); + if ( + lfs.isFile() + && StoreFileListFile.TRACK_FILE_PATTERN.matcher(lfs.getPath().getName()).matches() + ) { + out.println("Printing contents for file " + lfs.getPath().toString()); + int ret = print(fs, lfs.getPath()); + if (ret != 0) { + pass = false; + } + } + } + } catch (IOException e) { + LOG.error("Error processing " + e); + return 2; + } + } + return pass ? 0 : 2; + } + + private int print(FileSystem fs, Path path) throws IOException { + try { + if (!fs.exists(path)) { + err.println("ERROR, file doesnt exist: " + path); + return 2; + } + } catch (IOException e) { + err.println("ERROR, reading file: " + path + e); + return 2; + } + StoreFileList storeFile = StoreFileListFile.load(fs, path); + int end = storeFile.getStoreFileCount(); + for (int i = 0; i < end; i++) { + out.println(storeFile.getStoreFile(i).getName()); + } + return 0; + } + + public static void main(String[] args) throws Exception { + Configuration conf = HBaseConfiguration.create(); + int ret = ToolRunner.run(conf, new StoreFileListFilePrettyPrinter(), args); + System.exit(ret); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/storefiletracker/TestStoreFileListFilePrinter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/storefiletracker/TestStoreFileListFilePrinter.java new file mode 100644 index 000000000000..0887413cb623 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/storefiletracker/TestStoreFileListFilePrinter.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.storefiletracker; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.TableNameTestRule; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; + +@Category({ RegionServerTests.class, MediumTests.class }) +public class TestStoreFileListFilePrinter { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestStoreFileListFilePrinter.class); + + private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); + + @Rule + public final TableNameTestRule tableName = new TableNameTestRule(); + public static byte[] family = Bytes.toBytes("F");; + + @BeforeClass + public static void setUp() throws Exception { + UTIL.startMiniCluster(1); + } + + @AfterClass + public static void tearDown() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test + public void testPrintWithDirectPath() throws IOException { + createTable(); + TableName tn = tableName.getTableName(); + String fileName = getStoreFileName(tn, family); + + String cf = new String(family); + + Configuration conf = UTIL.getConfiguration(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(stream); + System.setOut(ps); + StoreFileListFilePrettyPrinter sftPrinter = new StoreFileListFilePrettyPrinter(conf); + + FileSystem fs = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn)) + .getRegionFileSystem().getFileSystem(); + Path regionPath = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn)) + .getRegionFileSystem().getRegionDir(); + Path cfPath = new Path(regionPath, cf); + Path path = new Path(cfPath, StoreFileListFile.TRACK_FILE_DIR); + RemoteIterator iterator = fs.listFiles(path, false); + while (iterator.hasNext()) { + LocatedFileStatus lfs = iterator.next(); + if (lfs.getPath().getName().contains("f2") || lfs.getPath().getName().contains("f1")) { + String[] argsF = { "-f", lfs.getPath().toString() }; + sftPrinter.run(argsF); + String result = new String(stream.toByteArray()); + String expect = fileName + "\n"; + assertEquals(expect, result); + } + } + } + + @Test + public void testPrintWithRegionOption() throws IOException { + createTable(); + String cf = new String(family); + TableName tn = tableName.getTableName(); + String fileName = getStoreFileName(tn, family); + + List regions = UTIL.getMiniHBaseCluster().getRegions(tableName.getTableName()); + String rn = regions.get(0).getRegionInfo().getEncodedName(); + String table = tableName.getTableName().toString(); + + Configuration conf = UTIL.getConfiguration(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(stream); + System.setOut(ps); + StoreFileListFilePrettyPrinter sftPrinter = new StoreFileListFilePrettyPrinter(conf); + String[] args = { "-r", rn, "-t", table, "-cf", cf }; + sftPrinter.run(args); + String result = new String(stream.toByteArray()); + + FileSystem fs = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn)) + .getRegionFileSystem().getFileSystem(); + Path regionPath = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn)) + .getRegionFileSystem().getRegionDir(); + Path cfPath = new Path(regionPath, cf); + Path path = new Path(cfPath, StoreFileListFile.TRACK_FILE_DIR); + RemoteIterator iterator = fs.listFiles(path, false); + String expect = ""; + while (iterator.hasNext()) { + LocatedFileStatus lfs = iterator.next(); + if (lfs.getPath().getName().contains("f2") || lfs.getPath().getName().contains("f1")) { + expect = expect + "Printing contents for file " + lfs.getPath() + "\n" + fileName + "\n"; + } + } + assertEquals(expect, result); + } + + private String getStoreFileName(TableName table, byte[] family) { + return Iterables + .getOnlyElement(Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(table)) + .getStore(family).getStorefiles()) + .getPath().getName(); + } + + private void createTable() throws IOException { + TableName tn = tableName.getTableName(); + byte[] row = Bytes.toBytes("row"); + byte[] qualifier = Bytes.toBytes("qualifier"); + byte[] value = Bytes.toBytes("value"); + TableDescriptor td = TableDescriptorBuilder.newBuilder(tn) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)) + .setValue(StoreFileTrackerFactory.TRACKER_IMPL, StoreFileTrackerFactory.Trackers.FILE.name()) + .build(); + UTIL.getAdmin().createTable(td); + try (Table table = UTIL.getConnection().getTable(tn)) { + table.put(new Put(row).addColumn(family, qualifier, value)); + } + UTIL.flush(tn); + } +}