From 48fc5d41539f019479174461702d07f971698db1 Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Thu, 25 Jul 2019 14:58:37 -0700 Subject: [PATCH 1/7] HDDS-1786 : Datanodes takeSnapshot should delete previously created snapshots. --- .../server/ratis/ContainerStateMachine.java | 12 +++ .../ratis/TestContainerStateMachine.java | 95 +++++++++++++++++++ ...java => TestContainerStateMachineInt.java} | 4 +- .../om/ratis/OzoneManagerStateMachine.java | 4 +- 4 files changed, 110 insertions(+), 5 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/{TestContainerStateMachine.java => TestContainerStateMachineInt.java} (98%) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index d82d11423074e..7f0d3d363c5bf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -21,6 +21,8 @@ import com.google.common.base.Preconditions; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; + +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.HddsUtils; @@ -72,6 +74,7 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.Collection; import java.util.List; import java.util.Map; @@ -256,6 +259,7 @@ public void persistContainerSet(OutputStream out) throws IOException { public long takeSnapshot() throws IOException { TermIndex ti = getLastAppliedTermIndex(); long startTime = Time.monotonicNow(); + SingleFileSnapshotInfo lastSnapshot = storage.findLatestSnapshot(); if (ti != null && ti.getIndex() != RaftLog.INVALID_LOG_INDEX) { final File snapshotFile = storage.getSnapshotFile(ti.getTerm(), ti.getIndex()); @@ -265,6 +269,14 @@ public long takeSnapshot() throws IOException { fos.flush(); // make sure the snapshot file is synced fos.getFD().sync(); + + //delete old snapshot only if the above creation step was successful. + if (lastSnapshot != null && lastSnapshot.getFile() != null) { + Path lastSnapshotFile = lastSnapshot.getFile().getPath(); + LOG.info("Deleting last snapshot at {} ", + lastSnapshotFile.toString()); + FileUtils.deleteQuietly(lastSnapshotFile.toFile()); + } } catch (IOException ioe) { LOG.info("{}: Failed to write snapshot at:{} file {}", gid, ti, snapshotFile); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java new file mode 100644 index 0000000000000..784233f20d892 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.transport.server.ratis; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.lang.reflect.Field; + +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.server.storage.FileInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +/** + * Unit test methods of ContainerStateMachine. + */ +public class TestContainerStateMachine { + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Test + public void testDeleteOldSnapshot() throws Exception { + + File lastSnapshotFile = temporaryFolder.newFile(); + File currentSnapshotFile = temporaryFolder.newFile(); + File nextSnapshotFile = temporaryFolder.newFile(); + + FileInfo fileInfoMock = mock(FileInfo.class); + when(fileInfoMock.getPath()).thenReturn(lastSnapshotFile.toPath()) + .thenReturn(currentSnapshotFile.toPath()); + + SingleFileSnapshotInfo singleFileSnapshotInfoMock = mock( + SingleFileSnapshotInfo.class); + when(singleFileSnapshotInfoMock.getFile()).thenReturn(fileInfoMock); + + TermIndex termIndexMock = mock(TermIndex.class); + when(termIndexMock.getIndex()).thenReturn(1L); + when(termIndexMock.getTerm()).thenReturn(1L); + + SimpleStateMachineStorage simpleStateMachineStorageMock = + mock(SimpleStateMachineStorage.class); + when(simpleStateMachineStorageMock.findLatestSnapshot()) + .thenReturn(singleFileSnapshotInfoMock); + when(simpleStateMachineStorageMock.getSnapshotFile(1L, 1L)) + .thenReturn(currentSnapshotFile).thenReturn(nextSnapshotFile); + + ContainerStateMachine containerStateMachine = + mock(ContainerStateMachine.class); + when(containerStateMachine.getLastAppliedTermIndex()).thenReturn( + termIndexMock); + when(containerStateMachine.takeSnapshot()).thenCallRealMethod(); + + // Have to use reflections here since storage is baked into + // ContainerStateMachine class. + Field f1 = containerStateMachine.getClass().getSuperclass() + .getDeclaredField("storage"); + f1.setAccessible(true); + f1.set(containerStateMachine, simpleStateMachineStorageMock); + + // Verify last snapshot deletion while calling takeSnapshot() API. + assertTrue(lastSnapshotFile.exists()); + containerStateMachine.takeSnapshot(); + assertFalse(lastSnapshotFile.exists()); + + // Verify current snapshot deletion while calling takeSnapshot() API once + // more. + assertTrue(currentSnapshotFile.exists()); + containerStateMachine.takeSnapshot(); + assertFalse(currentSnapshotFile.exists()); + } +} \ No newline at end of file diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineInt.java similarity index 98% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineInt.java index 13e3eff825e13..f1022df2bad63 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineInt.java @@ -53,7 +53,7 @@ * Tests the containerStateMachine failure handling. */ -public class TestContainerStateMachine { +public class TestContainerStateMachineInt { private static MiniOzoneCluster cluster; private static OzoneConfiguration conf = new OzoneConfiguration(); @@ -71,7 +71,7 @@ public class TestContainerStateMachine { @BeforeClass public static void init() throws Exception { path = GenericTestUtils - .getTempPath(TestContainerStateMachine.class.getSimpleName()); + .getTempPath(TestContainerStateMachineInt.class.getSimpleName()); File baseDir = new File(path); baseDir.mkdirs(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java index c51323e3a7d6e..3f0483198a939 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java @@ -28,8 +28,6 @@ import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.ozone.container.common.transport.server.ratis - .ContainerStateMachine; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; @@ -64,7 +62,7 @@ public class OzoneManagerStateMachine extends BaseStateMachine { static final Logger LOG = - LoggerFactory.getLogger(ContainerStateMachine.class); + LoggerFactory.getLogger(OzoneManagerStateMachine.class); private final SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); private final OzoneManagerRatisServer omRatisServer; From ba07c33f1e65576a82efa3060b4896fd0be5a747 Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Thu, 25 Jul 2019 15:23:56 -0700 Subject: [PATCH 2/7] HDDS-1786 : Datanodes takeSnapshot should delete previously created snapshots. (Added negative unit test) --- .../ratis/TestContainerStateMachine.java | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java index 784233f20d892..6f64ed6174eb4 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java @@ -24,12 +24,15 @@ import static org.mockito.Mockito.when; import java.io.File; +import java.io.IOException; import java.lang.reflect.Field; +import java.nio.file.Paths; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.storage.FileInfo; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -45,13 +48,13 @@ public class TestContainerStateMachine { @Test public void testDeleteOldSnapshot() throws Exception { - File lastSnapshotFile = temporaryFolder.newFile(); - File currentSnapshotFile = temporaryFolder.newFile(); - File nextSnapshotFile = temporaryFolder.newFile(); + File firstSnapshotFile = temporaryFolder.newFile(); + File secondSnapshotFile = temporaryFolder.newFile(); + File thirdSnapshotFile = temporaryFolder.newFile(); FileInfo fileInfoMock = mock(FileInfo.class); - when(fileInfoMock.getPath()).thenReturn(lastSnapshotFile.toPath()) - .thenReturn(currentSnapshotFile.toPath()); + when(fileInfoMock.getPath()).thenReturn(firstSnapshotFile.toPath()) + .thenReturn(secondSnapshotFile.toPath()); SingleFileSnapshotInfo singleFileSnapshotInfoMock = mock( SingleFileSnapshotInfo.class); @@ -66,7 +69,11 @@ public void testDeleteOldSnapshot() throws Exception { when(simpleStateMachineStorageMock.findLatestSnapshot()) .thenReturn(singleFileSnapshotInfoMock); when(simpleStateMachineStorageMock.getSnapshotFile(1L, 1L)) - .thenReturn(currentSnapshotFile).thenReturn(nextSnapshotFile); + .thenReturn(secondSnapshotFile) + .thenReturn(thirdSnapshotFile) + //Return non-existent file while taking 3rd snapshot. + .thenReturn(Paths.get("NonExistentDir", "NonExistentFile") + .toFile()); ContainerStateMachine containerStateMachine = mock(ContainerStateMachine.class); @@ -82,14 +89,24 @@ public void testDeleteOldSnapshot() throws Exception { f1.set(containerStateMachine, simpleStateMachineStorageMock); // Verify last snapshot deletion while calling takeSnapshot() API. - assertTrue(lastSnapshotFile.exists()); + assertTrue(firstSnapshotFile.exists()); containerStateMachine.takeSnapshot(); - assertFalse(lastSnapshotFile.exists()); + assertFalse(firstSnapshotFile.exists()); // Verify current snapshot deletion while calling takeSnapshot() API once // more. - assertTrue(currentSnapshotFile.exists()); + assertTrue(secondSnapshotFile.exists()); containerStateMachine.takeSnapshot(); - assertFalse(currentSnapshotFile.exists()); + assertFalse(secondSnapshotFile.exists()); + + // Now, takeSnapshot throws IOException. + try { + containerStateMachine.takeSnapshot(); + Assert.fail(); + } catch (IOException ioEx) { + //Verify the old snapshot file still exists. + assertTrue(thirdSnapshotFile.exists()); + } + } } \ No newline at end of file From b3de76812ea54c5545c4b4a64b395819fd47ecc5 Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Mon, 9 Sep 2019 15:15:49 -0700 Subject: [PATCH 3/7] Updated to use configurable Snapshot retention policy from RATIS-543. --- .../server/ratis/ContainerStateMachine.java | 11 -- .../ratis/RatisServerConfiguration.java | 30 +++++ .../server/ratis/XceiverServerRatis.java | 15 ++- .../ratis/TestContainerStateMachine.java | 112 ------------------ ...nt.java => TestContainerStateMachine.java} | 4 +- 5 files changed, 43 insertions(+), 129 deletions(-) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java delete mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/{TestContainerStateMachineInt.java => TestContainerStateMachine.java} (98%) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 50738a65cdea9..e16588c3b74bd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -23,7 +23,6 @@ import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; -import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.HddsUtils; @@ -78,7 +77,6 @@ import java.io.File; import java.io.IOException; -import java.nio.file.Path; import java.util.Collection; import java.util.List; import java.util.Map; @@ -286,7 +284,6 @@ public long takeSnapshot() throws IOException { LOG.error(msg); throw sme; } - SingleFileSnapshotInfo lastSnapshot = storage.findLatestSnapshot(); if (ti != null && ti.getIndex() != RaftLog.INVALID_LOG_INDEX) { final File snapshotFile = storage.getSnapshotFile(ti.getTerm(), ti.getIndex()); @@ -296,14 +293,6 @@ public long takeSnapshot() throws IOException { fos.flush(); // make sure the snapshot file is synced fos.getFD().sync(); - - //delete old snapshot only if the above creation step was successful. - if (lastSnapshot != null && lastSnapshot.getFile() != null) { - Path lastSnapshotFile = lastSnapshot.getFile().getPath(); - LOG.info("Deleting last snapshot at {} ", - lastSnapshotFile.toString()); - FileUtils.deleteQuietly(lastSnapshotFile.toFile()); - } } catch (IOException ioe) { LOG.error("{}: Failed to write snapshot at:{} file {}", gid, ti, snapshotFile); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java new file mode 100644 index 0000000000000..065d70cd41818 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java @@ -0,0 +1,30 @@ +package org.apache.hadoop.ozone.container.common.transport.server.ratis; + +import org.apache.hadoop.hdds.conf.Config; +import org.apache.hadoop.hdds.conf.ConfigGroup; +import org.apache.hadoop.hdds.conf.ConfigTag; +import org.apache.hadoop.hdds.conf.ConfigType; + +/** + * Holds configuration items for Ratis/Raft server. + */ +@ConfigGroup(prefix = "hdds.ratis.server") +public class RatisServerConfiguration { + + private int numSnapshotsRetained; + + @Config(key = "num.snapshots.retained", + type = ConfigType.INT, + defaultValue = "5", + tags = {ConfigTag.STORAGE}, + description = "Config parameter to specify number of old snapshots " + + "retained at the Ratis leader.") + public void setNumSnapshotsRetained(int numSnapshotsRetained) { + this.numSnapshotsRetained = numSnapshotsRetained; + } + + public int getNumSnapshotsRetained() { + return numSnapshotsRetained; + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index 7e7fbb99ceacd..e521fb4aabe45 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -19,8 +19,8 @@ package org.apache.hadoop.ozone.container.common.transport.server.ratis; import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -101,7 +101,7 @@ private static long nextCallId() { private final long cacheEntryExpiryInteval; private boolean isStarted = false; private DatanodeDetails datanodeDetails; - private final Configuration conf; + private final OzoneConfiguration conf; // TODO: Remove the gids set when Ratis supports an api to query active // pipelines private final Set raftGids = new HashSet<>(); @@ -110,7 +110,7 @@ private static long nextCallId() { private XceiverServerRatis(DatanodeDetails dd, int port, ContainerDispatcher dispatcher, ContainerController containerController, StateContext context, GrpcTlsConfig tlsConfig, CertificateClient caClient, - Configuration conf) + OzoneConfiguration conf) throws IOException { super(conf, caClient); this.conf = conf; @@ -255,6 +255,13 @@ private RaftProperties newRaftProperties() { OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT); RaftServerConfigKeys.Log.setPurgeGap(properties, purgeGap); + //Set the number of Snapshots Retained. + RatisServerConfiguration ratisServerConfiguration = + conf.getObject(RatisServerConfiguration.class); + int numSnapshotsRetained = + ratisServerConfiguration.getNumSnapshotsRetained(); + RaftServerConfigKeys.Snapshot.setSnapshotRetentionPolicy(properties, + numSnapshotsRetained); return properties; } @@ -377,7 +384,7 @@ private RpcType setRpcType(RaftProperties properties) { } public static XceiverServerRatis newXceiverServerRatis( - DatanodeDetails datanodeDetails, Configuration ozoneConf, + DatanodeDetails datanodeDetails, OzoneConfiguration ozoneConf, ContainerDispatcher dispatcher, ContainerController containerController, CertificateClient caClient, StateContext context) throws IOException { int localPort = ozoneConf.getInt( diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java deleted file mode 100644 index 6f64ed6174eb4..0000000000000 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/TestContainerStateMachine.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.transport.server.ratis; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Field; -import java.nio.file.Paths; - -import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.server.storage.FileInfo; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -/** - * Unit test methods of ContainerStateMachine. - */ -public class TestContainerStateMachine { - - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - @Test - public void testDeleteOldSnapshot() throws Exception { - - File firstSnapshotFile = temporaryFolder.newFile(); - File secondSnapshotFile = temporaryFolder.newFile(); - File thirdSnapshotFile = temporaryFolder.newFile(); - - FileInfo fileInfoMock = mock(FileInfo.class); - when(fileInfoMock.getPath()).thenReturn(firstSnapshotFile.toPath()) - .thenReturn(secondSnapshotFile.toPath()); - - SingleFileSnapshotInfo singleFileSnapshotInfoMock = mock( - SingleFileSnapshotInfo.class); - when(singleFileSnapshotInfoMock.getFile()).thenReturn(fileInfoMock); - - TermIndex termIndexMock = mock(TermIndex.class); - when(termIndexMock.getIndex()).thenReturn(1L); - when(termIndexMock.getTerm()).thenReturn(1L); - - SimpleStateMachineStorage simpleStateMachineStorageMock = - mock(SimpleStateMachineStorage.class); - when(simpleStateMachineStorageMock.findLatestSnapshot()) - .thenReturn(singleFileSnapshotInfoMock); - when(simpleStateMachineStorageMock.getSnapshotFile(1L, 1L)) - .thenReturn(secondSnapshotFile) - .thenReturn(thirdSnapshotFile) - //Return non-existent file while taking 3rd snapshot. - .thenReturn(Paths.get("NonExistentDir", "NonExistentFile") - .toFile()); - - ContainerStateMachine containerStateMachine = - mock(ContainerStateMachine.class); - when(containerStateMachine.getLastAppliedTermIndex()).thenReturn( - termIndexMock); - when(containerStateMachine.takeSnapshot()).thenCallRealMethod(); - - // Have to use reflections here since storage is baked into - // ContainerStateMachine class. - Field f1 = containerStateMachine.getClass().getSuperclass() - .getDeclaredField("storage"); - f1.setAccessible(true); - f1.set(containerStateMachine, simpleStateMachineStorageMock); - - // Verify last snapshot deletion while calling takeSnapshot() API. - assertTrue(firstSnapshotFile.exists()); - containerStateMachine.takeSnapshot(); - assertFalse(firstSnapshotFile.exists()); - - // Verify current snapshot deletion while calling takeSnapshot() API once - // more. - assertTrue(secondSnapshotFile.exists()); - containerStateMachine.takeSnapshot(); - assertFalse(secondSnapshotFile.exists()); - - // Now, takeSnapshot throws IOException. - try { - containerStateMachine.takeSnapshot(); - Assert.fail(); - } catch (IOException ioEx) { - //Verify the old snapshot file still exists. - assertTrue(thirdSnapshotFile.exists()); - } - - } -} \ No newline at end of file diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineInt.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java similarity index 98% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineInt.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java index 96bf02d4a2322..2c3cfab045e60 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineInt.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java @@ -53,7 +53,7 @@ * Tests the containerStateMachine failure handling. */ -public class TestContainerStateMachineInt { +public class TestContainerStateMachine { private static MiniOzoneCluster cluster; private static OzoneConfiguration conf = new OzoneConfiguration(); @@ -71,7 +71,7 @@ public class TestContainerStateMachineInt { @BeforeClass public static void init() throws Exception { path = GenericTestUtils - .getTempPath(TestContainerStateMachineInt.class.getSimpleName()); + .getTempPath(TestContainerStateMachine.class.getSimpleName()); File baseDir = new File(path); baseDir.mkdirs(); From 35cfe1987b0eebe0a2a8fde539953497638a1c0c Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Mon, 9 Sep 2019 15:18:11 -0700 Subject: [PATCH 4/7] Remove added newline. --- .../common/transport/server/ratis/ContainerStateMachine.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index e16588c3b74bd..cee9741c4fbe6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -22,7 +22,6 @@ import com.google.common.base.Preconditions; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; - import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.HddsUtils; From 80a9fb8c379c17bcc3e887c06dbb78f31208ac47 Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Tue, 10 Sep 2019 10:18:13 -0700 Subject: [PATCH 5/7] Added unit test. --- .../client/rpc/TestContainerStateMachine.java | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java index 2c3cfab045e60..4f675266c2ead 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java @@ -23,15 +23,20 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.client.CertificateClientTestImpl; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.KeyOutputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.container.ContainerTestHelper; +import org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine; +import org.apache.hadoop.ozone.container.common.transport.server.ratis.RatisServerConfiguration; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -39,6 +44,7 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.HashMap; import java.util.List; import java.util.concurrent.TimeUnit; @@ -85,7 +91,8 @@ public static void init() throws Exception { conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS); conf.setQuietMode(false); OzoneManager.setTestSecureOmFlag(true); - // conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.toString()); + conf.setLong(OzoneConfigKeys.DFS_RATIS_SNAPSHOT_THRESHOLD_KEY, 1); + // conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.toString()); cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1) .setHbInterval(200) @@ -148,4 +155,57 @@ public void testContainerStateMachineFailures() throws Exception { .getContainerState() == ContainerProtos.ContainerDataProto.State.UNHEALTHY); } + + @Test + public void testRatisSnapshotRetention() throws Exception { + + ContainerStateMachine stateMachine = + (ContainerStateMachine) ContainerTestHelper.getStateMachine(cluster); + SimpleStateMachineStorage storage = + (SimpleStateMachineStorage) stateMachine.getStateMachineStorage(); + Assert.assertNull(storage.findLatestSnapshot()); + + // Write 10 keys. Num snapshots should be equal to config value. + for (int i = 1; i <= 10; i++) { + OzoneOutputStream key = + objectStore.getVolume(volumeName).getBucket(bucketName) + .createKey(("ratis" + i), 1024, ReplicationType.RATIS, + ReplicationFactor.ONE, new HashMap<>()); + // First write and flush creates a container in the datanode + key.write(("ratis" + i).getBytes()); + key.flush(); + key.write(("ratis" + i).getBytes()); + } + + RatisServerConfiguration ratisServerConfiguration = + conf.getObject(RatisServerConfiguration.class); + + stateMachine = + (ContainerStateMachine) ContainerTestHelper.getStateMachine(cluster); + storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage(); + Path parentPath = storage.findLatestSnapshot().getFile().getPath(); + int numSnapshots = parentPath.getParent().toFile().listFiles().length; + Assert.assertEquals(ratisServerConfiguration.getNumSnapshotsRetained(), + numSnapshots); + + // Write 10 more keys. Num Snapshots should remain the same. + for (int i = 11; i <= 20; i++) { + OzoneOutputStream key = + objectStore.getVolume(volumeName).getBucket(bucketName) + .createKey(("ratis" + i), 1024, ReplicationType.RATIS, + ReplicationFactor.ONE, new HashMap<>()); + // First write and flush creates a container in the datanode + key.write(("ratis" + i).getBytes()); + key.flush(); + key.write(("ratis" + i).getBytes()); + } + stateMachine = + (ContainerStateMachine) ContainerTestHelper.getStateMachine(cluster); + storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage(); + parentPath = storage.findLatestSnapshot().getFile().getPath(); + numSnapshots = parentPath.getParent().toFile().listFiles().length; + Assert.assertEquals(ratisServerConfiguration.getNumSnapshotsRetained(), + numSnapshots); + } + } \ No newline at end of file From 0ae12ba4c157bd93e3d9d2c97a6277e8f92ecc4a Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Tue, 10 Sep 2019 22:41:23 -0700 Subject: [PATCH 6/7] Fix rat check failure. --- .../server/ratis/RatisServerConfiguration.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java index 065d70cd41818..7f112eacd81cf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/RatisServerConfiguration.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.ozone.container.common.transport.server.ratis; import org.apache.hadoop.hdds.conf.Config; From 4464d43a04c3a3ba53b45397d8eb004fb5cef1ab Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Wed, 11 Sep 2019 10:43:11 -0700 Subject: [PATCH 7/7] Fix unit test edge case. --- .../ozone/client/rpc/TestContainerStateMachine.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java index 4f675266c2ead..19a1707973153 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java @@ -185,8 +185,8 @@ public void testRatisSnapshotRetention() throws Exception { storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage(); Path parentPath = storage.findLatestSnapshot().getFile().getPath(); int numSnapshots = parentPath.getParent().toFile().listFiles().length; - Assert.assertEquals(ratisServerConfiguration.getNumSnapshotsRetained(), - numSnapshots); + Assert.assertTrue(Math.abs(ratisServerConfiguration + .getNumSnapshotsRetained() - numSnapshots) <= 1); // Write 10 more keys. Num Snapshots should remain the same. for (int i = 11; i <= 20; i++) { @@ -204,8 +204,8 @@ public void testRatisSnapshotRetention() throws Exception { storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage(); parentPath = storage.findLatestSnapshot().getFile().getPath(); numSnapshots = parentPath.getParent().toFile().listFiles().length; - Assert.assertEquals(ratisServerConfiguration.getNumSnapshotsRetained(), - numSnapshots); + Assert.assertTrue(Math.abs(ratisServerConfiguration + .getNumSnapshotsRetained() - numSnapshots) <= 1); } } \ No newline at end of file