Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wait and retry for rsm goal state #2801

Merged
merged 2 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,8 @@ def _setup_node(self) -> None:
log.info("Creating %s with the files need on the test node", tarball_path)
log.info("Adding orchestrator/scripts")
run_command(['tar', 'cvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"orchestrator"/"scripts"), '.'])
# log.info("Adding tests/scripts")
# run_command(['tar', 'rvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"tests"/"scripts"), '.'])
log.info("Adding tests/scripts")
run_command(['tar', 'rvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"tests"/"scripts"), '.'])
log.info("Adding tests/lib")
run_command(['tar', 'rvf', str(tarball_path), '--transform=s,^,lib/,', '-C', str(self.context.test_source_directory.parent), '--exclude=__pycache__', 'tests_e2e/tests/lib'])
log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)]))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ set -euo pipefail
#
# The service name is walinuxagent in Ubuntu/debian and waagent elsewhere
#
# TODO: Update install-agent to use this script

usage() (
echo "Usage: agent-service command"
Expand Down Expand Up @@ -51,11 +50,16 @@ else
service-start() { service $1 start; }
fi

if service-status walinuxagent > /dev/null 2>&1;then
python=$(get-agent-python)
distro=$($python -c 'from azurelinuxagent.common.version import get_distro; print(get_distro()[0])')
distro=$(echo $distro | tr '[:upper:]' '[:lower:]')

if [[ $distro == *"ubuntu"* || $distro == *"debian"* ]]; then
service_name="walinuxagent"
else
service_name="waagent"
fi

echo "Service name: $service_name"

if [[ "$cmd" == "restart" ]]; then
Expand Down
51 changes: 13 additions & 38 deletions tests_e2e/orchestrator/scripts/install-agent
Original file line number Diff line number Diff line change
Expand Up @@ -49,36 +49,11 @@ if [ "$#" -ne 0 ] || [ -z ${package+x} ] || [ -z ${version+x} ]; then
usage
fi

#
# Find the command to manage services
#
if command -v systemctl &> /dev/null; then
service-status() { systemctl --no-pager -l status $1; }
service-stop() { systemctl stop $1; }
service-start() { systemctl start $1; }
else
service-status() { service $1 status; }
service-stop() { service $1 stop; }
service-start() { service $1 start; }
fi

#
# Find the service name (walinuxagent in Ubuntu and waagent elsewhere)
#
if service-status walinuxagent > /dev/null 2>&1;then
service_name="walinuxagent"
else
service_name="waagent"
fi
echo "Service name: $service_name"

#
# Output the initial version of the agent

#
python=$(get-agent-python)
waagent=$(get-agent-bin-path)
echo "Agent's path: $waagent"
$python "$waagent" --version
waagent-version
printf "\n"

#
Expand All @@ -87,25 +62,25 @@ printf "\n"
echo "Installing $package as version $version..."
unzip.py "$package" "/var/lib/waagent/WALinuxAgent-$version"

# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar, don't have a waagent.conf
# but AutoUpdate defaults to True so there is no need to do anything in that case.
if [[ -e /etc/waagent.conf ]]; then
sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' /etc/waagent.conf
fi
python=$(get-agent-python)
# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar have a waagent.conf in different path
waagent_conf_path=$($python -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)')
echo "Agent's conf path: $waagent_conf_path"
sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path"
# By default GAUpdates flag set to True, so that agent go through update logic to look for new agents.
# But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9.
sed -i '$a GAUpdates.Enabled=n' /etc/waagent.conf
sed -i '$a GAUpdates.Enabled=n' "$waagent_conf_path"

#
# Restart the service
#
echo "Restarting service..."
service-stop $service_name
agent-service stop

# Rename the previous log to ensure the new log starts with the agent we just installed
mv /var/log/waagent.log /var/log/waagent."$(date --iso-8601=seconds)".log

service-start $service_name
agent-service start

#
# Verify that the new agent is running and output its status.
Expand All @@ -116,7 +91,7 @@ echo "Verifying agent installation..."
check-version() {
for i in {0..5}
do
if $python "$waagent" --version | grep -E "Goal state agent:\s+$version" > /dev/null; then
if waagent-version | grep -E "Goal state agent:\s+$version" > /dev/null; then
return 0
fi
sleep 10
Expand All @@ -133,8 +108,8 @@ else
exit_code=1
fi

$python "$waagent" --version
waagent-version
printf "\n"
service-status $service_name
agent-service status

exit $exit_code
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

# The script is needed for agent-update tests to find agent python for updating agent version in source file versoon.py
# returns the version of the agent
#
set -euo pipefail

PYTHON=""
# Looking for Agent's Python
shebang=`cat $(which waagent) | head -1 | grep '^#!'`
if [[ -z ${shebang+x} ]]; then
echo "ERROR: Can't determine Agent's Python."
exit 1
fi
shebang=`echo $shebang | sed 's/^#!//'`
# example /usr/bin/env python3
# some distros will have like /usr/bin/python3.6
read -ra strarr <<< "$shebang"
for val in "${strarr[@]}"; do
PYTHON=$val
done
echo $PYTHON
python=$(get-agent-python)
waagent=$(get-agent-bin-path)
echo "Agent's path: $waagent"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest removing this echo so that the output is just the same as "waagent --version"

$python "$waagent" --version
38 changes: 26 additions & 12 deletions tests_e2e/tests/agent_update/rsm_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
# For each scenario, we intiaite the rsm request with target version and then verify agent updated to that target version.
#
import json
from typing import List, Dict, Any

import requests
from azure.identity import DefaultAzureCredential
Expand All @@ -46,12 +47,27 @@ def __init__(self, context: AgentTestContext):
username=self._context.username,
private_key_file=self._context.private_key_file)

def get_ignore_error_rules(self) -> List[Dict[str, Any]]:
ignore_rules = [
#
# This is expected as we validate the downgrade scenario
#
# WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted
#
{
'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted"
}

]
return ignore_rules

def run(self) -> None:
# Allow agent to send supported feature flag
self._verify_agent_reported_supported_feature_flag()

log.info("*******Verifying the Agent Downgrade scenario*******")
self._mock_rsm_update("1.3.0.0")
self._check_rsm_gs("1.3.0.0")
self._prepare_agent()

# Verify downgrade scenario
Expand All @@ -60,29 +76,27 @@ def run(self) -> None:
# Verify upgrade scenario
log.info("*******Verifying the Agent Upgrade scenario*******")
self._mock_rsm_update("1.3.1.0")
self._check_rsm_gs("1.3.1.0")
self._verify_guest_agent_update("1.3.1.0")

# verify no version update. There is bug in CRP and will enable once it's fixed
# log.info("*******Verifying the no version update scenario*******")
# self._prepare_rsm_update("1.3.1.0")
# self._verify_guest_agent_update("1.3.1.0")

def _check_rsm_gs(self, requested_version: str) -> None:
# This checks if RSM GS available to the agent after we mock the rsm update request
output = self._ssh_client.run_command(f"rsm_goal_state.py --version {requested_version}", use_sudo=True)
log.info('Verifying requested version GS available to the agent \n%s', output)

def _prepare_agent(self) -> None:
"""
This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags
1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version.
2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions.
"""
local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-python"
remote_path = self._context.remote_working_directory/"agent-python"
self._ssh_client.copy(local_path, remote_path)
local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-service"
remote_path = self._context.remote_working_directory/"agent-service"
self._ssh_client.copy(local_path, remote_path)
local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-update-config"
remote_path = self._context.remote_working_directory/"agent-update-config"
self._ssh_client.copy(local_path, remote_path)
self._ssh_client.run_command(f"sudo {remote_path}")
output = self._ssh_client.run_command("agent-update-config", use_sudo=True)
log.info('Updating agent update required config \n%s', output)

@staticmethod
def _verify_agent_update_flag_enabled(vm: VmMachine) -> bool:
Expand Down Expand Up @@ -141,7 +155,7 @@ def _verify_guest_agent_update(self, requested_version: str) -> None:
Verify current agent version running on rsm requested version
"""
def _check_agent_version(requested_version: str) -> bool:
stdout: str = self._ssh_client.run_command("sudo waagent --version")
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
expected_version = f"Goal state agent: {requested_version}"
if expected_version in stdout:
return True
Expand All @@ -151,7 +165,7 @@ def _check_agent_version(requested_version: str) -> bool:

log.info("Verifying agent updated to requested version")
retry_if_not_found(lambda: _check_agent_version(requested_version))
stdout: str = self._ssh_client.run_command("sudo waagent --version")
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}")

def _verify_agent_reported_supported_feature_flag(self):
Expand Down
7 changes: 0 additions & 7 deletions tests_e2e/tests/lib/ssh_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,3 @@ def _copy(self, source: Path, target: Path, remote_source: bool, remote_target:
command.extend([str(source), str(target)])

shell.run_command(command)

def copy(self, local_path: Path, remote_path: Path):
"""
Copy file from local to remote machine
"""
destination = f"{self._username}@{self._ip_address}:{remote_path}"
shell.run_command(["scp", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, local_path, destination])
12 changes: 6 additions & 6 deletions tests_e2e/tests/scripts/agent-update-config
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
#
set -euo pipefail

AGENT_PYTHON_SCRIPT="agent-python"
AGENT_SERVICE_SCRIPT="agent-service"
PYTHON=$(source $AGENT_PYTHON_SCRIPT)
PYTHON=$(get-agent-python)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add a comment describing what this script does

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already added a comment.

echo "Agent's Python: $PYTHON"
# some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first.
version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))')
version_file_full_path="$version_file_dir/version.py"
sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '1.0.0.0'/" $version_file_full_path
sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' /etc/waagent.conf
sed -i '$a AutoUpdate.GAFamily=Test' /etc/waagent.conf
source $AGENT_SERVICE_SCRIPT restart
waagent_conf_path=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)')
sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' "$waagent_conf_path"
sed -i '$a AutoUpdate.GAFamily=Test' "$waagent_conf_path"
echo "Restarting service..."
agent-service restart
64 changes: 64 additions & 0 deletions tests_e2e/tests/scripts/rsm_goal_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env pypy3

# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add a comment describing what this script does

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

import sys
import time

from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties


def get_requested_version(gs: GoalState) -> str:
agent_families = gs.extensions_goal_state.agent_families
agent_family_manifests = [m for m in agent_families if m.name == "Test" and len(m.uris) > 0]
if len(agent_family_manifests) == 0:
raise Exception(
u"No manifest links found for agent family Test, skipping agent update verification")
manifest = agent_family_manifests[0]
if manifest.is_requested_version_specified and manifest.requested_version is not None:
return str(manifest.requested_version)
return ""


try:
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--version', required=True)
args = parser.parse_args()

protocol = get_protocol_util().get_protocol(init_goal_state=False)
protocol.client.reset_goal_state(
goal_state_properties=GoalStateProperties.ExtensionsGoalState | GoalStateProperties.Certificates)

attempts = 5
while attempts > 0:
protocol.client.update_goal_state()
goal_state = protocol.client.get_goal_state()
requested_version = get_requested_version(goal_state)
if requested_version == args.version:
print("Latest GS includes rsm requested version : {0}.".format(requested_version))
break
print("RSM requested version GS not available yet to the agent, checking again in 30 secs.")
attempts -= 1
time.sleep(30)

except Exception as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)

sys.exit(0)