Skip to content

Commit

Permalink
Wait and retry for rsm goal state (Azure#2801)
Browse files Browse the repository at this point in the history
* wait for rsm goal state

* address comments
  • Loading branch information
nagworld9 committed Jun 8, 2023
1 parent 5b118d9 commit b8f1592
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 132 deletions.
21 changes: 11 additions & 10 deletions tests_e2e/orchestrator/scripts/install-agent
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ fi

#
# Output the initial version of the agent

#
python=$(get-agent-python)
waagent=$(get-agent-bin-path)
Expand Down Expand Up @@ -107,25 +108,25 @@ echo "========== Installing Agent =========="
echo "Installing $package as version $version..."
unzip.py "$package" "/var/lib/waagent/WALinuxAgent-$version"

# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar, don't have a waagent.conf
# but AutoUpdate defaults to True so there is no need to do anything in that case.
if [[ -e /etc/waagent.conf ]]; then
sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' /etc/waagent.conf
fi
python=$(get-agent-python)
# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar have a waagent.conf in different path
waagent_conf_path=$($python -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)')
echo "Agent's conf path: $waagent_conf_path"
sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path"
# By default GAUpdates flag set to True, so that agent go through update logic to look for new agents.
# But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9.
sed -i '$a GAUpdates.Enabled=n' /etc/waagent.conf
sed -i '$a GAUpdates.Enabled=n' "$waagent_conf_path"

#
# Restart the service
#
echo "Restarting service..."
service-stop $service_name
agent-service stop

# Rename the previous log to ensure the new log starts with the agent we just installed
mv /var/log/waagent.log /var/log/waagent."$(date --iso-8601=seconds)".log

service-start $service_name
agent-service start

#
# Verify that the new agent is running and output its status.
Expand All @@ -137,7 +138,7 @@ check-version() {
# We need to wait for the extension handler to start, give it a couple of minutes
for i in {1..12}
do
if $python "$waagent" --version | grep -E "Goal state agent:\s+$version" > /dev/null; then
if waagent-version | grep -E "Goal state agent:\s+$version" > /dev/null; then
return 0
fi
sleep 10
Expand All @@ -160,6 +161,6 @@ printf "\n"
echo "========== Final Status =========="
$python "$waagent" --version
printf "\n"
service-status $service_name
agent-service status

exit $exit_code
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

# The script is needed for agent-update tests to find agent python for updating agent version in source file versoon.py
# returns the version of the agent
#
set -euo pipefail

PYTHON=""
# Looking for Agent's Python
shebang=`cat $(which waagent) | head -1 | grep '^#!'`
if [[ -z ${shebang+x} ]]; then
echo "ERROR: Can't determine Agent's Python."
exit 1
fi
shebang=`echo $shebang | sed 's/^#!//'`
# example /usr/bin/env python3
# some distros will have like /usr/bin/python3.6
read -ra strarr <<< "$shebang"
for val in "${strarr[@]}"; do
PYTHON=$val
done
echo $PYTHON
python=$(get-agent-python)
waagent=$(get-agent-bin-path)
$python "$waagent" --version
38 changes: 26 additions & 12 deletions tests_e2e/tests/agent_update/rsm_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
# For each scenario, we intiaite the rsm request with target version and then verify agent updated to that target version.
#
import json
from typing import List, Dict, Any

import requests
from azure.identity import DefaultAzureCredential
Expand All @@ -46,12 +47,27 @@ def __init__(self, context: AgentTestContext):
username=self._context.username,
private_key_file=self._context.private_key_file)

def get_ignore_error_rules(self) -> List[Dict[str, Any]]:
ignore_rules = [
#
# This is expected as we validate the downgrade scenario
#
# WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted
#
{
'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted"
}

]
return ignore_rules

def run(self) -> None:
# Allow agent to send supported feature flag
self._verify_agent_reported_supported_feature_flag()

log.info("*******Verifying the Agent Downgrade scenario*******")
self._mock_rsm_update("1.3.0.0")
self._check_rsm_gs("1.3.0.0")
self._prepare_agent()

# Verify downgrade scenario
Expand All @@ -60,29 +76,27 @@ def run(self) -> None:
# Verify upgrade scenario
log.info("*******Verifying the Agent Upgrade scenario*******")
self._mock_rsm_update("1.3.1.0")
self._check_rsm_gs("1.3.1.0")
self._verify_guest_agent_update("1.3.1.0")

# verify no version update. There is bug in CRP and will enable once it's fixed
# log.info("*******Verifying the no version update scenario*******")
# self._prepare_rsm_update("1.3.1.0")
# self._verify_guest_agent_update("1.3.1.0")

def _check_rsm_gs(self, requested_version: str) -> None:
# This checks if RSM GS available to the agent after we mock the rsm update request
output = self._ssh_client.run_command(f"rsm_goal_state.py --version {requested_version}", use_sudo=True)
log.info('Verifying requested version GS available to the agent \n%s', output)

def _prepare_agent(self) -> None:
"""
This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags
1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version.
2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions.
"""
local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-python"
remote_path = self._context.remote_working_directory/"agent-python"
self._ssh_client.copy(local_path, remote_path)
local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-service"
remote_path = self._context.remote_working_directory/"agent-service"
self._ssh_client.copy(local_path, remote_path)
local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-update-config"
remote_path = self._context.remote_working_directory/"agent-update-config"
self._ssh_client.copy(local_path, remote_path)
self._ssh_client.run_command(f"sudo {remote_path}")
output = self._ssh_client.run_command("agent-update-config", use_sudo=True)
log.info('Updating agent update required config \n%s', output)

@staticmethod
def _verify_agent_update_flag_enabled(vm: VmMachine) -> bool:
Expand Down Expand Up @@ -141,7 +155,7 @@ def _verify_guest_agent_update(self, requested_version: str) -> None:
Verify current agent version running on rsm requested version
"""
def _check_agent_version(requested_version: str) -> bool:
stdout: str = self._ssh_client.run_command("sudo waagent --version")
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
expected_version = f"Goal state agent: {requested_version}"
if expected_version in stdout:
return True
Expand All @@ -151,7 +165,7 @@ def _check_agent_version(requested_version: str) -> bool:

log.info("Verifying agent updated to requested version")
retry_if_not_found(lambda: _check_agent_version(requested_version))
stdout: str = self._ssh_client.run_command("sudo waagent --version")
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}")

def _verify_agent_reported_supported_feature_flag(self):
Expand Down
7 changes: 0 additions & 7 deletions tests_e2e/tests/lib/ssh_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,3 @@ def _copy(self, source: Path, target: Path, remote_source: bool, remote_target:
command.extend([str(source), str(target)])

shell.run_command(command)

def copy(self, local_path: Path, remote_path: Path):
"""
Copy file from local to remote machine
"""
destination = f"{self._username}@{self._ip_address}:{remote_path}"
shell.run_command(["scp", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, local_path, destination])
80 changes: 0 additions & 80 deletions tests_e2e/tests/scripts/agent-service

This file was deleted.

12 changes: 6 additions & 6 deletions tests_e2e/tests/scripts/agent-update-config
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
#
set -euo pipefail

AGENT_PYTHON_SCRIPT="agent-python"
AGENT_SERVICE_SCRIPT="agent-service"
PYTHON=$(source $AGENT_PYTHON_SCRIPT)
PYTHON=$(get-agent-python)
echo "Agent's Python: $PYTHON"
# some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first.
version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))')
version_file_full_path="$version_file_dir/version.py"
sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '1.0.0.0'/" $version_file_full_path
sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' /etc/waagent.conf
sed -i '$a AutoUpdate.GAFamily=Test' /etc/waagent.conf
source $AGENT_SERVICE_SCRIPT restart
waagent_conf_path=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)')
sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' "$waagent_conf_path"
sed -i '$a AutoUpdate.GAFamily=Test' "$waagent_conf_path"
echo "Restarting service..."
agent-service restart
66 changes: 66 additions & 0 deletions tests_e2e/tests/scripts/rsm_goal_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env pypy3

# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Verify the latest goal state included rsm requested version and if not, retry
#
import argparse
import sys
import time

from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties


def get_requested_version(gs: GoalState) -> str:
agent_families = gs.extensions_goal_state.agent_families
agent_family_manifests = [m for m in agent_families if m.name == "Test" and len(m.uris) > 0]
if len(agent_family_manifests) == 0:
raise Exception(
u"No manifest links found for agent family Test, skipping agent update verification")
manifest = agent_family_manifests[0]
if manifest.is_requested_version_specified and manifest.requested_version is not None:
return str(manifest.requested_version)
return ""


try:
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--version', required=True)
args = parser.parse_args()

protocol = get_protocol_util().get_protocol(init_goal_state=False)
protocol.client.reset_goal_state(
goal_state_properties=GoalStateProperties.ExtensionsGoalState | GoalStateProperties.Certificates)

attempts = 5
while attempts > 0:
protocol.client.update_goal_state()
goal_state = protocol.client.get_goal_state()
requested_version = get_requested_version(goal_state)
if requested_version == args.version:
print("Latest GS includes rsm requested version : {0}.".format(requested_version))
break
print("RSM requested version GS not available yet to the agent, checking again in 30 secs.")
attempts -= 1
time.sleep(30)

except Exception as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)

sys.exit(0)

0 comments on commit b8f1592

Please sign in to comment.