From 1991e9ee5984da0586bf95ce79af6d3a76e2329b Mon Sep 17 00:00:00 2001
From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com>
Date: Mon, 23 Sep 2024 09:21:40 +0200
Subject: [PATCH] [DPE-5369] rs int tests for upgrades (#332)

---
 tests/integration/conftest.py                 |  41 +++++++
 .../ha_tests/scripts/deploy_chaos_mesh.sh     |   8 +-
 .../ha_tests/scripts/destroy_chaos_mesh.sh    |   6 +-
 tests/integration/ha_tests/test_ha.py         |  31 -----
 tests/integration/upgrades/helpers.py         |  45 +++++++
 tests/integration/upgrades/test_upgrades.py   | 110 +++++++++++++++---
 tests/unit/test_upgrade.py                    |   1 +
 7 files changed, 185 insertions(+), 57 deletions(-)
 create mode 100644 tests/integration/conftest.py
 create mode 100644 tests/integration/upgrades/helpers.py

diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
new file mode 100644
index 000000000..8f831472e
--- /dev/null
+++ b/tests/integration/conftest.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+import pytest
+import pytest_asyncio
+from pytest_operator.plugin import OpsTest
+
+from .ha_tests.helpers import (
+    deploy_chaos_mesh,
+    destroy_chaos_mesh,
+    get_application_name,
+)
+
+
+@pytest_asyncio.fixture
+async def continuous_writes(ops_test: OpsTest) -> None:
+    """Starts continuous writes to the MongoDB cluster and clear the writes at the end."""
+    application_name = await get_application_name(ops_test, "application")
+
+    application_unit = ops_test.model.applications[application_name].units[0]
+
+    clear_writes_action = await application_unit.run_action("clear-continuous-writes")
+    await clear_writes_action.wait()
+
+    start_writes_action = await application_unit.run_action("start-continuous-writes")
+    await start_writes_action.wait()
+
+    yield
+
+    clear_writes_action = await application_unit.run_action("clear-continuous-writes")
+    await clear_writes_action.wait()
+
+
+@pytest.fixture(scope="module")
+def chaos_mesh(ops_test: OpsTest) -> None:
+    deploy_chaos_mesh(ops_test.model.info.name)
+
+    yield
+
+    destroy_chaos_mesh(ops_test.model.info.name)
diff --git a/tests/integration/ha_tests/scripts/deploy_chaos_mesh.sh b/tests/integration/ha_tests/scripts/deploy_chaos_mesh.sh
index 819f5efd3..0a11fb8ca 100755
--- a/tests/integration/ha_tests/scripts/deploy_chaos_mesh.sh
+++ b/tests/integration/ha_tests/scripts/deploy_chaos_mesh.sh
@@ -11,13 +11,13 @@ if [ -z "${chaos_mesh_ns}" ]; then
 fi
 
 deploy_chaos_mesh() {
-    if [ "$(helm repo list | grep -c 'chaos-mesh')" != "1" ]; then
-        echo "adding chaos-mesh helm repo"
-        helm repo add chaos-mesh https://charts.chaos-mesh.org
+    if [ "$(microk8s.helm repo list | grep -c 'chaos-mesh')" != "1" ]; then
+        echo "adding chaos-mesh microk8s.helm repo"
+        microk8s.helm repo add chaos-mesh https://charts.chaos-mesh.org
     fi
 
     echo "installing chaos-mesh"
-    helm install chaos-mesh chaos-mesh/chaos-mesh --namespace="${chaos_mesh_ns}" --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/var/snap/microk8s/common/run/containerd.sock --set dashboard.create=false --version "${chaos_mesh_version}" --set clusterScoped=false --set controllerManager.targetNamespace="${chaos_mesh_ns}"
+    microk8s.helm install chaos-mesh chaos-mesh/chaos-mesh --namespace="${chaos_mesh_ns}" --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/var/snap/microk8s/common/run/containerd.sock --set dashboard.create=false --version "${chaos_mesh_version}" --set clusterScoped=false --set controllerManager.targetNamespace="${chaos_mesh_ns}"
     sleep 10
 }
 
diff --git a/tests/integration/ha_tests/scripts/destroy_chaos_mesh.sh b/tests/integration/ha_tests/scripts/destroy_chaos_mesh.sh
index c77f2fd49..7daba1387 100755
--- a/tests/integration/ha_tests/scripts/destroy_chaos_mesh.sh
+++ b/tests/integration/ha_tests/scripts/destroy_chaos_mesh.sh
@@ -42,9 +42,9 @@ destroy_chaos_mesh() {
         timeout 30 kubectl delete crd "$(kubectl get crd | grep 'chaos-mesh.org' | awk '{print $1}')" || :
     fi
 
-    if [ -n "${chaos_mesh_ns}" ] && [ "$(helm repo list --namespace "${chaos_mesh_ns}" | grep -c 'chaos-mesh')" = "1" ]; then
-        echo "uninstalling chaos-mesh helm repo"
-        helm uninstall chaos-mesh --namespace "${chaos_mesh_ns}" || :
+    if [ -n "${chaos_mesh_ns}" ] && [ "$(microk8s.helm repo list --namespace "${chaos_mesh_ns}" | grep -c 'chaos-mesh')" = "1" ]; then
+        echo "uninstalling chaos-mesh microk8s.helm repo"
+        microk8s.helm uninstall chaos-mesh --namespace "${chaos_mesh_ns}" || :
     fi
 }
 
diff --git a/tests/integration/ha_tests/test_ha.py b/tests/integration/ha_tests/test_ha.py
index 0172e4bc2..322fb3e72 100644
--- a/tests/integration/ha_tests/test_ha.py
+++ b/tests/integration/ha_tests/test_ha.py
@@ -7,7 +7,6 @@
 from datetime import datetime, timezone
 
 import pytest
-import pytest_asyncio
 from pytest_operator.plugin import OpsTest
 
 from ..helpers import APP_NAME, check_or_scale_app
@@ -22,8 +21,6 @@
     count_primaries,
     deploy_and_scale_application,
     deploy_and_scale_mongodb,
-    deploy_chaos_mesh,
-    destroy_chaos_mesh,
     fetch_replica_set_members,
     find_record_in_collection,
     find_unit,
@@ -55,34 +52,6 @@
 MEDIAN_REELECTION_TIME = 12
 
 
-@pytest_asyncio.fixture
-async def continuous_writes(ops_test: OpsTest) -> None:
-    """Starts continuous writes to the MongoDB cluster and clear the writes at the end."""
-    application_name = await get_application_name(ops_test, "application")
-
-    application_unit = ops_test.model.applications[application_name].units[0]
-
-    clear_writes_action = await application_unit.run_action("clear-continuous-writes")
-    await clear_writes_action.wait()
-
-    start_writes_action = await application_unit.run_action("start-continuous-writes")
-    await start_writes_action.wait()
-
-    yield
-
-    clear_writes_action = await application_unit.run_action("clear-continuous-writes")
-    await clear_writes_action.wait()
-
-
-@pytest.fixture(scope="module")
-def chaos_mesh(ops_test: OpsTest) -> None:
-    deploy_chaos_mesh(ops_test.model.info.name)
-
-    yield
-
-    destroy_chaos_mesh(ops_test.model.info.name)
-
-
 @pytest.mark.group(1)
 @pytest.mark.abort_on_fail
 async def test_build_and_deploy(ops_test: OpsTest, cmd_mongodb_charm) -> None:
diff --git a/tests/integration/upgrades/helpers.py b/tests/integration/upgrades/helpers.py
new file mode 100644
index 000000000..9a646f09e
--- /dev/null
+++ b/tests/integration/upgrades/helpers.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# Copyright 2024 Canonical Ltd.
+# See LICENSE file for licensing details.
+
+import logging
+from pathlib import Path
+
+from pytest_operator.plugin import OpsTest
+
+from ..backup_tests import helpers as backup_helpers
+
+logger = logging.getLogger(__name__)
+
+
+async def assert_successful_run_upgrade_sequence(
+    ops_test: OpsTest, app_name: str, new_charm: Path
+) -> None:
+    """Runs the upgrade sequence on a given app."""
+    leader_unit = await backup_helpers.get_leader_unit(ops_test, app_name)
+    # action = await leader_unit.run_action("pre-upgrade-check")
+    # await action.wait()
+    # assert action.status == "completed", "pre-upgrade-check failed, expected to succeed."
+
+    await ops_test.model.applications[app_name].refresh(path=new_charm)
+    await ops_test.model.wait_for_idle(
+        apps=[app_name], status="active", timeout=1000, idle_period=30
+    )
+
+    # resume upgrade only needs to be ran when:
+    # 1. there are more than one units in the application
+    # 2. AND the underlying workload was updated
+    if len(ops_test.model.applications[app_name].units) < 2:
+        return
+
+    if "resume-upgrade" not in ops_test.model.applications[app_name].status_message:
+        return
+
+    logger.info(f"Calling resume-upgrade for {app_name}")
+    action = await leader_unit.run_action("resume-upgrade")
+    await action.wait()
+    assert action.status == "completed", "resume-upgrade failed, expected to succeed."
+
+    await ops_test.model.wait_for_idle(
+        apps=[app_name], status="active", timeout=1000, idle_period=30
+    )
diff --git a/tests/integration/upgrades/test_upgrades.py b/tests/integration/upgrades/test_upgrades.py
index 1caab3b2d..42ec405ac 100644
--- a/tests/integration/upgrades/test_upgrades.py
+++ b/tests/integration/upgrades/test_upgrades.py
@@ -2,40 +2,112 @@
 # Copyright 2024 Canonical Ltd.
 # See LICENSE file for licensing details.
 
+import logging
+
 import pytest
 from pytest_operator.plugin import OpsTest
 
-from ..ha_tests.helpers import find_unit
-from ..helpers import (
-    APP_NAME,
-    check_or_scale_app,
-    get_app_name,
-    get_password,
-    set_password,
+from ..backup_tests import helpers as backup_helpers
+from ..ha_tests.helpers import (
+    count_writes,
+    deploy_and_scale_application,
+    find_unit,
+    isolate_instance_from_cluster,
+    relate_mongodb_and_application,
+    remove_instance_isolation,
+    wait_until_unit_in_status,
 )
+from ..helpers import check_or_scale_app, get_app_name, get_password, set_password
+from .helpers import assert_successful_run_upgrade_sequence
 
+logger = logging.getLogger(__name__)
 
-@pytest.mark.skip("Missing upgrade code for now")
+WRITE_APP = "application"
+MONGODB_CHARM_NAME = "mongodb-k8s"
+
+
+@pytest.mark.skip("skip until upgrades work has been released to charmhub")
 @pytest.mark.group(1)
 @pytest.mark.abort_on_fail
 async def test_build_and_deploy(ops_test: OpsTest):
-    app_name = await get_app_name(ops_test)
 
-    if app_name:
-        await check_or_scale_app(ops_test, app_name, required_units=3)
+    await deploy_and_scale_application(ops_test)
+
+    db_app_name = await get_app_name(ops_test)
+
+    if db_app_name:
+        await check_or_scale_app(ops_test, db_app_name, required_units=2)
         return
+    else:
+        await ops_test.model.deploy(MONGODB_CHARM_NAME, channel="6/edge", num_units=2)
 
-    app_name = APP_NAME
+    db_app_name = await get_app_name(ops_test)
+    await ops_test.model.wait_for_idle(
+        apps=[db_app_name], status="active", timeout=1000, idle_period=120
+    )
+
+    await relate_mongodb_and_application(ops_test, db_app_name, WRITE_APP)
+
+
+@pytest.mark.skip("skip until upgrades work has been released to charmhub")
+@pytest.mark.group(1)
+@pytest.mark.abort_on_fail
+async def test_successful_upgrade(ops_test: OpsTest, continuous_writes) -> None:
+    new_charm = await ops_test.build_charm(".")
+    db_app_name = await get_app_name(ops_test)
+    await assert_successful_run_upgrade_sequence(ops_test, db_app_name, new_charm=new_charm)
 
-    await ops_test.model.deploy(
-        app_name,
-        application_name=app_name,
-        num_units=3,
-        series="jammy",
-        channel="6/edge",
+    # verify that the no writes were skipped
+    application_unit = ops_test.model.applications[WRITE_APP].units[0]
+    stop_writes_action = await application_unit.run_action("stop-continuous-writes")
+    await stop_writes_action.wait()
+    total_expected_writes = int(stop_writes_action.results["writes"])
+    assert total_expected_writes > 0, "error while getting expected writes."
+
+    actual_writes = await count_writes(ops_test, app_name=db_app_name)
+    assert total_expected_writes == actual_writes, "missed writes during upgrade procedure."
+
+
+@pytest.mark.skip("skip until upgrades work has been released to charmhub")
+@pytest.mark.group(1)
+@pytest.mark.abort_on_fail
+async def test_preflight_check(ops_test: OpsTest) -> None:
+    db_app_name = await get_app_name(ops_test)
+    leader_unit = await backup_helpers.get_leader_unit(ops_test, db_app_name)
+
+    logger.info("Calling pre-upgrade-check")
+    action = await leader_unit.run_action("pre-upgrade-check")
+    await action.wait()
+    assert action.status == "completed", "pre-upgrade-check failed, expected to succeed."
+
+
+@pytest.mark.skip("skip until upgrades work has been released to charmhub")
+@pytest.mark.group(1)
+@pytest.mark.abort_on_fail
+async def test_preflight_check_failure(ops_test: OpsTest, chaos_mesh) -> None:
+    db_app_name = await get_app_name(ops_test)
+    leader_unit = await backup_helpers.get_leader_unit(ops_test, db_app_name)
+
+    non_leader_unit = None
+    for unit in ops_test.model.applications[db_app_name].units:
+        if unit != leader_unit:
+            non_leader_unit = unit
+            break
+
+    isolate_instance_from_cluster(ops_test, non_leader_unit.name)
+    await wait_until_unit_in_status(
+        ops_test, non_leader_unit, leader_unit, "(not reachable/healthy)"
     )
+
+    logger.info("Calling pre-upgrade-check")
+    action = await leader_unit.run_action("pre-upgrade-check")
+    await action.wait()
+    assert action.status == "completed", "pre-upgrade-check failed, expected to succeed."
+
+    # restore network after test
+    remove_instance_isolation(ops_test)
     await ops_test.model.wait_for_idle(
-        apps=[app_name], status="active", timeout=1000, idle_period=120
+        apps=[db_app_name], status="active", timeout=1000, idle_period=30
     )
 
 
diff --git a/tests/unit/test_upgrade.py b/tests/unit/test_upgrade.py
index 9e0d953f6..ef29cfd0a 100644
--- a/tests/unit/test_upgrade.py
+++ b/tests/unit/test_upgrade.py
@@ -14,6 +14,7 @@
 
 
 class TestUpgrades(unittest.TestCase):
+    @patch("charm.get_charm_revision")
     @patch_network_get(private_address="1.1.1.1")
     def setUp(self, *unused):
         self.harness = Harness(MongoDBCharm)