Skip to content

Commit

Permalink
HBASE-28209: Create a jmx metrics to expose the oldWALs directory size
Browse files Browse the repository at this point in the history
Signed-off-by: Wellington Chevreuil <[email protected]>
  • Loading branch information
vinayakphegde authored and wchevreuil committed Dec 8, 2023
1 parent 6d7b9c8 commit 7133958
Show file tree
Hide file tree
Showing 12 changed files with 195 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,13 @@ public enum OperationStatusCode {
/** Default value for the balancer period */
public static final int DEFAULT_HBASE_BALANCER_PERIOD = 300000;

/** Config for the oldWALs directory size updater period */
public static final String HBASE_OLDWAL_DIR_SIZE_UPDATER_PERIOD =
"hbase.master.oldwals.dir.updater.period";

/** Default value for the oldWALs directory size updater period */
public static final int DEFAULT_HBASE_OLDWAL_DIR_SIZE_UPDATER_PERIOD = 300000;

/**
* Config key for enable/disable automatically separate child regions to different region servers
* in the procedure of split regions. One child will be kept to the server where parent region is
Expand Down
6 changes: 6 additions & 0 deletions hbase-common/src/main/resources/hbase-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,12 @@ possible configurations would overwhelm and obscure the important.
<description>Period at which the region balancer runs in the Master, in
milliseconds.</description>
</property>
<property>
<name>hbase.master.oldwals.dir.updater.period</name>
<value>300000</value>
<description>Period at which the oldWALs directory size calculator/updater will run in the
Master, in milliseconds.</description>
</property>
<property>
<name>hbase.regions.slop</name>
<value>0.2</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public interface MetricsMasterSource extends BaseSource {
String MERGE_PLAN_COUNT_NAME = "mergePlanCount";

String CLUSTER_REQUESTS_NAME = "clusterRequests";
String OLD_WAL_DIR_SIZE_NAME = "oldWALsDirSize";
String MASTER_ACTIVE_TIME_DESC = "Master Active Time";
String MASTER_START_TIME_DESC = "Master Start Time";
String MASTER_FINISHED_INITIALIZATION_TIME_DESC =
Expand All @@ -85,6 +86,7 @@ public interface MetricsMasterSource extends BaseSource {
String MERGE_PLAN_COUNT_DESC = "Number of Region Merge Plans executed";

String SERVER_CRASH_METRIC_PREFIX = "serverCrash";
String OLD_WAL_DIR_SIZE_DESC = "size of old WALs directory in bytes";

/**
* Increment the number of requests the cluster has seen.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,9 @@ public interface MetricsMasterWrapper {
* Get the time in Millis when the master finished initializing/becoming the active master
*/
long getMasterInitializationTime();

/**
* Get the size of old WALs directory in bytes.
*/
long getOldWALsDirSize();
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) {
.tag(Interns.info(SERVER_NAME_NAME, SERVER_NAME_DESC), masterWrapper.getServerName())
.tag(Interns.info(CLUSTER_ID_NAME, CLUSTER_ID_DESC), masterWrapper.getClusterId())
.tag(Interns.info(IS_ACTIVE_MASTER_NAME, IS_ACTIVE_MASTER_DESC),
String.valueOf(masterWrapper.getIsActiveMaster()));
String.valueOf(masterWrapper.getIsActiveMaster()))
.addGauge(Interns.info(OLD_WAL_DIR_SIZE_NAME, OLD_WAL_DIR_SIZE_DESC),
masterWrapper.getOldWALsDirSize());
}

metricsRegistry.snapshot(metricsRecordBuilder, all);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ public class HMaster extends HRegionServer implements MasterServices {
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
private QuotaObserverChore quotaObserverChore;
private SnapshotQuotaObserverChore snapshotQuotaChore;
private OldWALsDirSizeChore oldWALsDirSizeChore;

private ProcedureExecutor<MasterProcedureEnv> procedureExecutor;
private ProcedureStore procedureStore;
Expand Down Expand Up @@ -1300,6 +1301,10 @@ private void finishActiveMasterInitialization() throws IOException, InterruptedE

this.rollingUpgradeChore = new RollingUpgradeChore(this);
getChoreService().scheduleChore(rollingUpgradeChore);

this.oldWALsDirSizeChore = new OldWALsDirSizeChore(this);
getChoreService().scheduleChore(this.oldWALsDirSizeChore);

status.markComplete("Progress after master initialized complete");
}

Expand Down Expand Up @@ -1825,6 +1830,7 @@ private void stopChores() {
shutdownChore(hbckChore);
shutdownChore(regionsRecoveryChore);
shutdownChore(rollingUpgradeChore);
shutdownChore(oldWALsDirSizeChore);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ public boolean accept(Path p) {
// create the split log lock
private final Lock splitLogLock = new ReentrantLock();

// old WALs directory size in bytes
private long oldWALsDirSize;

/**
* Superceded by {@link SplitWALManager}; i.e. procedure-based WAL splitting rather than 'classic'
* zk-coordinated WAL splitting.
Expand All @@ -113,6 +116,7 @@ public MasterWalManager(Configuration conf, FileSystem fs, MasterServices servic
this.services = services;
this.splitLogManager = new SplitLogManager(services, conf);
this.oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
this.oldWALsDirSize = 0;
}

public void stop() {
Expand All @@ -133,6 +137,14 @@ Path getOldLogDir() {
return this.oldLogDir;
}

public void updateOldWALsDirSize() throws IOException {
this.oldWALsDirSize = fs.getContentSummary(this.oldLogDir).getLength();
}

public long getOldWALsDirSize() {
return this.oldWALsDirSize;
}

public FileSystem getFileSystem() {
return this.fs;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,4 +209,12 @@ public Map<String, Entry<Long, Long>> getNamespaceSpaceUtilization() {
Entry<Long, Long> convertSnapshot(SpaceQuotaSnapshot snapshot) {
return new SimpleImmutableEntry<Long, Long>(snapshot.getUsage(), snapshot.getLimit());
}

@Override
public long getOldWALsDirSize() {
if (master == null || !master.isInitialized()) {
return 0;
}
return master.getMasterWalManager().getOldWALsDirSize();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import java.io.IOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* This chore is used to update the 'oldWALsDirSize' variable in {@link MasterWalManager} through
* the {@link MasterWalManager#updateOldWALsDirSize()} method.
*/
@InterfaceAudience.Private
public class OldWALsDirSizeChore extends ScheduledChore {
private static final Logger LOG = LoggerFactory.getLogger(OldWALsDirSizeChore.class);

private final MasterServices master;

public OldWALsDirSizeChore(MasterServices master) {
super(master.getServerName() + "-OldWALsDirSizeChore", master,
master.getConfiguration().getInt(HConstants.HBASE_OLDWAL_DIR_SIZE_UPDATER_PERIOD,
HConstants.DEFAULT_HBASE_OLDWAL_DIR_SIZE_UPDATER_PERIOD));
this.master = master;
}

@Override
protected void chore() {
try {
this.master.getMasterWalManager().updateOldWALsDirSize();
} catch (IOException e) {
LOG.error("Got exception while trying to update the old WALs Directory size counter: "
+ e.getMessage(), e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ public void testDefaultMasterMetrics() throws Exception {

metricsHelper.assertCounter(MetricsMasterSource.SERVER_CRASH_METRIC_PREFIX + "SubmittedCount",
0, masterSource);
metricsHelper.assertGauge("oldWALsDirSize", master.getMasterWalManager().getOldWALsDirSize(),
masterSource);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public void testInfo() throws IOException {
assertEquals(master.getMasterCoprocessors().length, info.getCoprocessors().length);
assertEquals(master.getServerManager().getOnlineServersList().size(),
info.getNumRegionServers());
assertEquals(master.getMasterWalManager().getOldWALsDirSize(), info.getOldWALsDirSize());
int regionServerCount =
NUM_RS + (LoadBalancer.isTablesOnMaster(TEST_UTIL.getConfiguration()) ? 1 : 0);
assertEquals(regionServerCount, info.getNumRegionServers());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.master.assignment.MockMasterServices;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Tests for OldWALsDirSizeChore Here we are using the {@link MockMasterServices} to mock the Hbase
* Master. Chore's won't be running automatically; we need to run every time.
*/
@Category({ MasterTests.class, SmallTests.class })
public class TestOldWALsDirSizeChore {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestOldWALsDirSizeChore.class);

private static final Logger LOG = LoggerFactory.getLogger(TestOldWALsDirSizeChore.class);

private MockMasterServices master;

private static final HBaseTestingUtility HTU = new HBaseTestingUtility();

@Before
public void setUp() throws Exception {
master = new MockMasterServices(HTU.getConfiguration());
master.start(10, null);
}

@After
public void tearDown() throws Exception {
master.stop("tearDown");
}

@Test
public void testOldWALsDirSizeChore() throws IOException {
// Assume the OldWALs directory size is initially zero as the chore hasn't run yet
long currentOldWALsDirSize = master.getMasterWalManager().getOldWALsDirSize();
assertEquals("Initial OldWALs directory size should be zero before running the chore", 0,
currentOldWALsDirSize);

int dummyFileSize = 50 * 1024 * 1024; // 50MB
byte[] dummyData = new byte[dummyFileSize];

// Create a dummy file in the OldWALs directory
Path dummyFileInOldWALsDir = new Path(master.getMasterWalManager().getOldLogDir(), "dummy.txt");
try (FSDataOutputStream outputStream =
master.getMasterWalManager().getFileSystem().create(dummyFileInOldWALsDir)) {
outputStream.write(dummyData);
}

// Run the OldWALsDirSizeChore to update the directory size
OldWALsDirSizeChore oldWALsDirSizeChore = new OldWALsDirSizeChore(master);
oldWALsDirSizeChore.chore();

// Verify that the OldWALs directory size has increased by the file size
assertEquals("OldWALs directory size after chore should be as expected", dummyFileSize,
master.getMasterWalManager().getOldWALsDirSize());
}
}

0 comments on commit 7133958

Please sign in to comment.