diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 46765ea98..6554ab308 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -629,10 +629,9 @@ def get_normal_upgrade_frequency(conf=__conf__): def get_enable_ga_versioning(conf=__conf__): """ If True, the agent uses GA Versioning for auto-updating the agent vs automatically auto-updating to the highest version. - NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableGAVersioning", False) + return conf.get_switch("Debug.EnableGAVersioning", True) def get_firewall_rules_log_period(conf=__conf__): diff --git a/azurelinuxagent/ga/agent_update.py b/azurelinuxagent/ga/agent_update.py new file mode 100644 index 000000000..ba9861324 --- /dev/null +++ b/azurelinuxagent/ga/agent_update.py @@ -0,0 +1,260 @@ +import datetime +import glob +import os +import shutil + +from azurelinuxagent.common import conf, logger +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.logger import LogLevel +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.utils import fileutil, textutil +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN +from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState + + +def get_agent_update_handler(protocol): + return AgentUpdateHandler(protocol) + + +class AgentUpdateHandler(object): + + def __init__(self, protocol): + self._protocol = protocol + self._ga_family = conf.get_autoupdate_gafamily() + self._autoupdate_enabled = conf.get_autoupdate_enabled() + self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id + self._last_attempted_update_time = datetime.datetime.min + self._last_attempted_update_version = FlexibleVersion("0.0.0.0") + + def __should_update_agent(self, requested_version): + """ + check to see if update is allowed once per (as specified in the conf.get_autoupdate_frequency()) + return false when we don't allow updates. + """ + now = datetime.datetime.now() + + if self._last_attempted_update_time != datetime.datetime.min and self._last_attempted_update_version == requested_version: + next_attempt_time = self._last_attempted_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + else: + next_attempt_time = now + + if next_attempt_time > now: + return False + # The time limit elapsed for us to allow updates. + return True + + def __get_agent_family_from_last_gs(self, goal_state): + """ + Get the agent_family from last GS for the given family + Returns: first entry of Manifest + Exception if no manifests found in the last GS + """ + family = self._ga_family + agent_families = goal_state.extensions_goal_state.agent_families + agent_family_manifests = [m for m in agent_families if m.name == family and len(m.uris) > 0] + if len(agent_family_manifests) == 0: + raise Exception( + u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format( + self._ga_family, self._gs_id)) + return agent_family_manifests[0] + + @staticmethod + def __get_requested_version(agent_family): + """ + Get the requested version from agent family + Returns: Requested version if supported and available + None if requested version missing or GA versioning not enabled + """ + if conf.get_enable_ga_versioning() and agent_family.is_requested_version_specified: + if agent_family.requested_version is not None: + return FlexibleVersion(agent_family.requested_version) + return None + + @staticmethod + def __get_largest_version(agent_manifest): + largest_version = FlexibleVersion("0.0.0.0") + for pkg in agent_manifest.pkg_list.versions: + pkg_version = FlexibleVersion(pkg.version) + if pkg_version > largest_version: + largest_version = pkg_version + return largest_version + + def __download_and_get_agent(self, goal_state, agent_family, agent_manifest, requested_version): + """ + This function downloads the new agent(requested version) and returns the downloaded version. + """ + if agent_manifest is None: # Fetch agent manifest if it's not already done + agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + package_to_download = self.__get_agent_package_to_download(agent_manifest, requested_version) + is_fast_track_goal_state = goal_state.extensions_goal_state.source == GoalStateSource.FastTrack + agent = GuestAgent.from_agent_package(package_to_download, self._protocol, is_fast_track_goal_state) + return agent + + def __get_agent_package_to_download(self, agent_manifest, version): + """ + Returns the package of the given Version found in the manifest. If not found, returns exception + """ + for pkg in agent_manifest.pkg_list.versions: + if FlexibleVersion(pkg.version) == version: + # Found a matching package, only download that one + return pkg + + raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " + "skipping agent update".format(str(version), self._gs_id)) + + @staticmethod + def __purge_extra_agents_from_disk(known_agents): + """ + Remove from disk all directories and .zip files of unknown agents + (without removing the current, running agent). + """ + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + + known_versions = [agent.version for agent in known_agents] + known_versions.append(CURRENT_VERSION) + + for agent_path in glob.iglob(path): + try: + name = fileutil.trim_ext(agent_path, "zip") + m = AGENT_DIR_PATTERN.match(name) + if m is not None and FlexibleVersion(m.group(1)) not in known_versions: + if os.path.isfile(agent_path): + logger.info(u"Purging outdated Agent file {0}", agent_path) + os.remove(agent_path) + else: + logger.info(u"Purging outdated Agent directory {0}", agent_path) + shutil.rmtree(agent_path) + except Exception as e: + logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) + + @staticmethod + def __proceed_with_update(requested_version): + """ + If requested version is specified, upgrade/downgrade to the specified version. + Raises: AgentUpgradeExitException + """ + if requested_version < CURRENT_VERSION: + # In case of a downgrade, we mark the current agent as bad version to avoid starting it back up ever again + # (the expectation here being that if we get request to a downgrade, + # there's a good reason for not wanting the current version). + prefix = "downgrade" + try: + # We should always have an agent directory for the CURRENT_VERSION + agents_on_disk = AgentUpdateHandler.__get_available_agents_on_disk() + current_agent = next(agent for agent in agents_on_disk if agent.version == CURRENT_VERSION) + msg = "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " \ + "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION) + logger.info(msg) + current_agent.mark_failure(is_fatal=True, reason=msg) + except StopIteration: + logger.warn( + "Could not find a matching agent with current version {0} to blacklist, skipping it".format( + CURRENT_VERSION)) + else: + # In case of an upgrade, we don't need to exclude anything as the daemon will automatically + # start the next available highest version which would be the target version + prefix = "upgrade" + raise AgentUpgradeExitException("Agent update found, Exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) + + @staticmethod + def __get_available_agents_on_disk(): + available_agents = [agent for agent in AgentUpdateHandler.__get_all_agents_on_disk() if agent.is_available] + return sorted(available_agents, key=lambda agent: agent.version, reverse=True) + + @staticmethod + def __get_all_agents_on_disk(): + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] + + @staticmethod + def __log_event(level, msg_, success_=True): + if level == LogLevel.WARNING: + logger.warn(msg_) + elif level == LogLevel.ERROR: + logger.error(msg_) + elif level == LogLevel.INFO: + logger.info(msg_) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + + def run(self, goal_state): + try: + # Ignore new agents if update is disabled + if not self._autoupdate_enabled: + return + + self._gs_id = goal_state.extensions_goal_state.id + agent_family = self.__get_agent_family_from_last_gs(goal_state) + requested_version = self.__get_requested_version(agent_family) + agent_manifest = None # This is to make sure fetch agent manifest once per update + + if requested_version is None: + if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled + warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id) + self.__log_event(LogLevel.WARNING, warn_msg) + GAUpdateReportState.report_error_msg = warn_msg + agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + requested_version = self.__get_largest_version(agent_manifest) + else: + # Save the requested version to report back + GAUpdateReportState.report_expected_version = requested_version + # Remove the missing requested version warning once requested version becomes available + if "Missing requested version" in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = "" + + if requested_version == CURRENT_VERSION: + return + + # Check if an update is allowed + if not self.__should_update_agent(requested_version): + return + + msg_ = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( + self._gs_id, str(requested_version)) + self.__log_event(LogLevel.INFO, msg_) + + try: + agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) + + if not agent.is_available: + msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( + str(agent.version)) + self.__log_event(LogLevel.WARNING, msg) + return + + # We delete the directory and the zip package from the filesystem except current version and target version + self.__purge_extra_agents_from_disk(known_agents=[agent]) + self.__proceed_with_update(requested_version) + + finally: + self._last_attempted_update_time = datetime.datetime.now() + self._last_attempted_update_version = requested_version + + except Exception as err: + if isinstance(err, AgentUpgradeExitException): + raise err + if "Missing requested version" not in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) + self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success_=False) + + def get_vmagent_update_status(self): + """ + This function gets the VMAgent update status as per the last attempted update. + Returns: None if fail to report or update never attempted with requested version + """ + try: + if conf.get_enable_ga_versioning(): + if not GAUpdateReportState.report_error_msg: + status = VMAgentUpdateStatuses.Success + code = 0 + else: + status = VMAgentUpdateStatuses.Error + code = 1 + return VMAgentUpdateStatus(expected_version=str(GAUpdateReportState.report_expected_version), status=status, code=code, message=GAUpdateReportState.report_error_msg) + except Exception as err: + self.__log_event(LogLevel.WARNING, "Unable to report agent update status: {0}".format( + textutil.format_exception(err)), success_=False) + return None diff --git a/azurelinuxagent/ga/guestagent.py b/azurelinuxagent/ga/guestagent.py new file mode 100644 index 000000000..56f314244 --- /dev/null +++ b/azurelinuxagent/ga/guestagent.py @@ -0,0 +1,316 @@ +import json +import os +import shutil +import time + +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.utils import textutil + +from azurelinuxagent.common import logger, conf +from azurelinuxagent.common.exception import UpdateError +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import AGENT_DIR_PATTERN, AGENT_NAME, CURRENT_VERSION +from azurelinuxagent.ga.exthandlers import HandlerManifest + +AGENT_ERROR_FILE = "error.json" # File name for agent error record +AGENT_MANIFEST_FILE = "HandlerManifest.json" +MAX_FAILURE = 3 # Max failure allowed for agent before declare bad agent + + +class GAUpdateReportState(object): + """ + This class is primarily used to maintain the in-memory persistent state for the agent updates. + This state will be persisted throughout the current service run and might be modified by external classes. + """ + report_error_msg = "" + report_expected_version = FlexibleVersion("0.0.0.0") + + +class GuestAgent(object): + def __init__(self, path, pkg, protocol, is_fast_track_goal_state): + """ + If 'path' is given, the object is initialized to the version installed under that path. + + If 'pkg' is given, the version specified in the package information is downloaded and the object is + initialized to that version. + + 'is_fast_track_goal_state' and 'protocol' are used only when a package is downloaded. + + NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly + """ + self._is_fast_track_goal_state = is_fast_track_goal_state + self.pkg = pkg + self._protocol = protocol + version = None + if path is not None: + m = AGENT_DIR_PATTERN.match(path) + if m is None: + raise UpdateError(u"Illegal agent directory: {0}".format(path)) + version = m.group(1) + elif self.pkg is not None: + version = pkg.version + + if version is None: + raise UpdateError(u"Illegal agent version: {0}".format(version)) + self.version = FlexibleVersion(version) + + location = u"disk" if path is not None else u"package" + logger.verbose(u"Loading Agent {0} from {1}", self.name, location) + + self.error = GuestAgentError(self.get_agent_error_file()) + self.error.load() + + try: + self._ensure_downloaded() + self._ensure_loaded() + except Exception as e: + # If we're unable to download/unpack the agent, delete the Agent directory + try: + if os.path.isdir(self.get_agent_dir()): + shutil.rmtree(self.get_agent_dir(), ignore_errors=True) + except Exception as err: + logger.warn("Unable to delete Agent files: {0}".format(err)) + msg = u"Agent {0} install failed with exception:".format( + self.name) + detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e)) + if "Missing requested version" not in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = detailed_msg # capture the download errors to report back + add_event( + AGENT_NAME, + version=self.version, + op=WALAEventOperation.Install, + is_success=False, + message=detailed_msg) + + @staticmethod + def from_installed_agent(path): + """ + Creates an instance of GuestAgent using the agent installed in the given 'path'. + """ + return GuestAgent(path, None, None, False) + + @staticmethod + def from_agent_package(package, protocol, is_fast_track_goal_state): + """ + Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it. + """ + return GuestAgent(None, package, protocol, is_fast_track_goal_state) + + @property + def name(self): + return "{0}-{1}".format(AGENT_NAME, self.version) + + def get_agent_cmd(self): + return self.manifest.get_enable_command() + + def get_agent_dir(self): + return os.path.join(conf.get_lib_dir(), self.name) + + def get_agent_error_file(self): + return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE) + + def get_agent_manifest_path(self): + return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE) + + def get_agent_pkg_path(self): + return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip")) + + def clear_error(self): + self.error.clear() + self.error.save() + + @property + def is_available(self): + return self.is_downloaded and not self.is_blacklisted + + @property + def is_blacklisted(self): + return self.error is not None and self.error.is_blacklisted + + @property + def is_downloaded(self): + return self.is_blacklisted or \ + os.path.isfile(self.get_agent_manifest_path()) + + def mark_failure(self, is_fatal=False, reason=''): + try: + if not os.path.isdir(self.get_agent_dir()): + os.makedirs(self.get_agent_dir()) + self.error.mark_failure(is_fatal=is_fatal, reason=reason) + self.error.save() + if self.error.is_blacklisted: + msg = u"Agent {0} is permanently blacklisted".format(self.name) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False, + version=self.version) + except Exception as e: + logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) + + def _ensure_downloaded(self): + logger.verbose(u"Ensuring Agent {0} is downloaded", self.name) + + if self.is_downloaded: + logger.verbose(u"Agent {0} was previously downloaded - skipping download", self.name) + return + + if self.pkg is None: + raise UpdateError(u"Agent {0} is missing package and download URIs".format( + self.name)) + + self._download() + + msg = u"Agent {0} downloaded successfully".format(self.name) + logger.verbose(msg) + add_event( + AGENT_NAME, + version=self.version, + op=WALAEventOperation.Install, + is_success=True, + message=msg) + + def _ensure_loaded(self): + self._load_manifest() + self._load_error() + + def _download(self): + try: + self._protocol.client.download_zip_package("agent package", self.pkg.uris, self.get_agent_pkg_path(), self.get_agent_dir(), use_verify_header=self._is_fast_track_goal_state) + except Exception as exception: + msg = "Unable to download Agent {0}: {1}".format(self.name, ustr(exception)) + add_event( + AGENT_NAME, + op=WALAEventOperation.Download, + version=CURRENT_VERSION, + is_success=False, + message=msg) + raise UpdateError(msg) + + def _load_error(self): + try: + self.error = GuestAgentError(self.get_agent_error_file()) + self.error.load() + logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) + except Exception as e: + logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e)) + + def _load_manifest(self): + path = self.get_agent_manifest_path() + if not os.path.isfile(path): + msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE) + raise UpdateError(msg) + + with open(path, "r") as manifest_file: + try: + manifests = json.load(manifest_file) + except Exception as e: + msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e)) + raise UpdateError(msg) + if type(manifests) is list: + if len(manifests) <= 0: + msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE) + raise UpdateError(msg) + manifest = manifests[0] + else: + manifest = manifests + + try: + self.manifest = HandlerManifest(manifest) # pylint: disable=W0201 + if len(self.manifest.get_enable_command()) <= 0: + raise Exception(u"Manifest is missing the enable command") + except Exception as e: + msg = u"Agent {0} has an illegal {1}: {2}".format( + self.name, + AGENT_MANIFEST_FILE, + ustr(e)) + raise UpdateError(msg) + + logger.verbose( + u"Agent {0} loaded manifest from {1}", + self.name, + self.get_agent_manifest_path()) + logger.verbose(u"Successfully loaded Agent {0} {1}: {2}", + self.name, + AGENT_MANIFEST_FILE, + ustr(self.manifest.data)) + return + + +class GuestAgentError(object): + def __init__(self, path): + self.last_failure = 0.0 + self.was_fatal = False + if path is None: + raise UpdateError(u"GuestAgentError requires a path") + self.path = path + self.failure_count = 0 + self.reason = '' + + self.clear() + return + + def mark_failure(self, is_fatal=False, reason=''): + self.last_failure = time.time() + self.failure_count += 1 + self.was_fatal = is_fatal + self.reason = reason + return + + def clear(self): + self.last_failure = 0.0 + self.failure_count = 0 + self.was_fatal = False + self.reason = '' + return + + @property + def is_blacklisted(self): + return self.was_fatal or self.failure_count >= MAX_FAILURE + + def load(self): + if self.path is not None and os.path.isfile(self.path): + try: + with open(self.path, 'r') as f: + self.from_json(json.load(f)) + except Exception as error: + # The error.json file is only supposed to be written only by the agent. + # If for whatever reason the file is malformed, just delete it to reset state of the errors. + logger.warn( + "Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format( + self.path, textutil.format_exception(error))) + try: + os.remove(self.path) + except Exception: + # We try best case efforts to delete the file, ignore error if we're unable to do so + pass + return + + def save(self): + if os.path.isdir(os.path.dirname(self.path)): + with open(self.path, 'w') as f: + json.dump(self.to_json(), f) + return + + def from_json(self, data): + self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0)) + self.failure_count = max(self.failure_count, data.get(u"failure_count", 0)) + self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) + reason = data.get(u"reason", '') + self.reason = reason if reason != '' else self.reason + return + + def to_json(self): + data = { + u"last_failure": self.last_failure, + u"failure_count": self.failure_count, + u"was_fatal": self.was_fatal, + u"reason": ustr(self.reason) + } + return data + + def __str__(self): + return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + self.last_failure, + self.failure_count, + self.was_fatal, + self.reason) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index cd758b972..ba02f9dbb 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -17,7 +17,6 @@ # Requires Python 2.6+ and Openssl 1.0+ # import glob -import json import os import re import shutil @@ -37,14 +36,12 @@ from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.event import add_event, initialize_event_logger_vminfo_common_parameters, \ WALAEventOperation, EVENTS_DIRECTORY -from azurelinuxagent.common.exception import UpdateError, ExitException, AgentUpgradeExitException, AgentMemoryExceededException +from azurelinuxagent.common.exception import ExitException, AgentUpgradeExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler -from azurelinuxagent.common.protocol.goal_state import GoalStateSource from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ - VERSION_0 +from azurelinuxagent.common.protocol.restapi import VERSION_0 from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils.archive import StateArchiver, AGENT_STATUS_FILE @@ -54,16 +51,16 @@ from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, AGENT_VERSION, \ CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, get_lis_version, \ has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, get_daemon_version +from azurelinuxagent.ga.agent_update import get_agent_update_handler from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed from azurelinuxagent.ga.collect_telemetry_events import get_collect_telemetry_events_handler from azurelinuxagent.ga.env import get_env_handler -from azurelinuxagent.ga.exthandlers import HandlerManifest, ExtHandlersHandler, list_agent_lib_directory, \ +from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, list_agent_lib_directory, \ ExtensionStatusValue, ExtHandlerStatusValue +from azurelinuxagent.ga.guestagent import GuestAgent from azurelinuxagent.ga.monitor import get_monitor_handler from azurelinuxagent.ga.send_telemetry_events import get_send_telemetry_events_handler -AGENT_ERROR_FILE = "error.json" # File name for agent error record -AGENT_MANIFEST_FILE = "HandlerManifest.json" AGENT_PARTITION_FILE = "partition" CHILD_HEALTH_INTERVAL = 15 * 60 @@ -71,8 +68,6 @@ CHILD_LAUNCH_RESTART_MAX = 3 CHILD_POLL_INTERVAL = 60 -MAX_FAILURE = 3 # Max failure allowed for agent before blacklisted - GOAL_STATE_PERIOD_EXTENSIONS_DISABLED = 5 * 60 ORPHAN_POLL_INTERVAL = 3 @@ -121,14 +116,6 @@ def __str__(self): return ustr(self.summary) -class AgentUpgradeType(object): - """ - Enum for different modes of Agent Upgrade - """ - Hotfix = "Hotfix" - Normal = "Normal" - - def get_update_handler(): return UpdateHandler() @@ -143,11 +130,6 @@ def __init__(self): self._is_running = True - # Member variables to keep track of the Agent AutoUpgrade - self.last_attempt_time = None - self._last_hotfix_upgrade_time = None - self._last_normal_upgrade_time = None - self.agents = [] self.child_agent = None @@ -368,6 +350,7 @@ def run(self, debug=False): from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler(protocol) + agent_update_handler = get_agent_update_handler(protocol) self._ensure_no_orphans() self._emit_restart_event() @@ -401,7 +384,7 @@ def run(self, debug=False): while self.is_running: self._check_daemon_running(debug) self._check_threads_running(all_thread_handlers) - self._process_goal_state(exthandlers_handler, remote_access_handler) + self._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self._send_heartbeat_telemetry(protocol) self._check_agent_memory_usage() time.sleep(self._goal_state_period) @@ -519,80 +502,6 @@ def _try_update_goal_state(self, protocol): return True - def __update_guest_agent(self, protocol): - """ - This function checks for new Agent updates and raises AgentUpgradeExitException if available. - There are 2 different ways the agent checks for an update - - 1) Requested Version is specified in the Goal State. - - In this case, the Agent will download the requested version and upgrade/downgrade instantly. - 2) No requested version. - - In this case, the agent will periodically check (1 hr) for new agent versions in GA Manifest. - - If available, it will download all versions > CURRENT_VERSION. - - Depending on the highest version > CURRENT_VERSION, - the agent will update within 4 hrs (for a Hotfix update) or 24 hrs (for a Normal update) - """ - - def log_next_update_time(): - next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() - upgrade_type = self.__get_agent_upgrade_type(available_agent) - next_time = next_hotfix_time if upgrade_type == AgentUpgradeType.Hotfix else next_normal_time - message_ = "Discovered new {0} upgrade {1}; Will upgrade on or after {2}".format( - upgrade_type, available_agent.name, - datetime.utcfromtimestamp(next_time).strftime(logger.Logger.LogTimeFormatInUTC)) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, version=CURRENT_VERSION, is_success=True, - message=message_, log_event=False) - logger.info(message_) - - def handle_updates_for_requested_version(): - if requested_version < CURRENT_VERSION: - prefix = "downgrade" - # In case of a downgrade, we blacklist the current agent to avoid starting it back up ever again - # (the expectation here being that if RSM is asking us to a downgrade, - # there's a good reason for not wanting the current version). - try: - # We should always have an agent directory for the CURRENT_VERSION - # (unless the CURRENT_VERSION == daemon version, but since we don't support downgrading - # below daemon version, we will never reach this code path if that's the scenario) - current_agent = next(agent for agent in self.agents if agent.version == CURRENT_VERSION) - msg = "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " \ - "suggesting that we really don't want to execute any extensions using this version".format( - CURRENT_VERSION) - logger.info(msg) - current_agent.mark_failure(is_fatal=True, reason=msg) - except StopIteration: - logger.warn( - "Could not find a matching agent with current version {0} to blacklist, skipping it".format( - CURRENT_VERSION)) - else: - # In case of an upgrade, we don't need to blacklist anything as the daemon will automatically - # start the next available highest version which would be the requested version - prefix = "upgrade" - raise AgentUpgradeExitException( - "Exiting current process to {0} to the request Agent version {1}".format(prefix, requested_version)) - - # Skip the update if there is no goal state yet or auto-update is disabled - if self._goal_state is None or not conf.get_autoupdate_enabled(): - return False - - if self._download_agent_if_upgrade_available(protocol): - # The call to get_latest_agent_greater_than_daemon() also finds all agents in directory and sets the self.agents property. - # This state is used to find the GuestAgent object with the current version later if requested version is available in last GS. - available_agent = self.get_latest_agent_greater_than_daemon() - requested_version, _ = self.__get_requested_version_and_agent_family_from_last_gs() - if requested_version is not None: - # If requested version specified, upgrade/downgrade to the specified version instantly as this is - # driven by the goal state (as compared to the agent periodically checking for new upgrades every hour) - handle_updates_for_requested_version() - elif available_agent is None: - # Legacy behavior: The current agent can become unavailable and needs to be reverted. - # In that case, self._upgrade_available() returns True and available_agent would be None. Handling it here. - raise AgentUpgradeExitException( - "Agent {0} is reverting to the installed agent -- exiting".format(CURRENT_AGENT)) - else: - log_next_update_time() - - self.__upgrade_agent_if_permitted() - def _processing_new_incarnation(self): """ True if we are currently processing a new incarnation (i.e. WireServer goal state) @@ -606,18 +515,18 @@ def _processing_new_extensions_goal_state(self): egs = self._goal_state.extensions_goal_state return self._goal_state is not None and egs.id != self._last_extensions_gs_id and not egs.is_outdated - def _process_goal_state(self, exthandlers_handler, remote_access_handler): + def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_update_handler): protocol = exthandlers_handler.protocol # update self._goal_state if not self._try_update_goal_state(protocol): - # agent updates and status reporting should be done even when the goal state is not updated - self.__update_guest_agent(protocol) - self._report_status(exthandlers_handler) + # status reporting should be done even when the goal state is not updated + agent_update_status = agent_update_handler.get_vmagent_update_status() + self._report_status(exthandlers_handler, agent_update_status) return # check for agent updates - self.__update_guest_agent(protocol) + agent_update_handler.run(self._goal_state) try: if self._processing_new_extensions_goal_state(): @@ -635,7 +544,8 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): CGroupConfigurator.get_instance().check_cgroups(cgroup_metrics=[]) # report status before processing the remote access, since that operation can take a long time - self._report_status(exthandlers_handler) + agent_update_status = agent_update_handler.get_vmagent_update_status() + self._report_status(exthandlers_handler, agent_update_status) if self._processing_new_incarnation(): remote_access_handler.run() @@ -664,44 +574,7 @@ def _cleanup_legacy_goal_state_history(): except Exception as exception: logger.warn("Error removing legacy history files: {0}", ustr(exception)) - def __get_vmagent_update_status(self, goal_state_changed): - """ - This function gets the VMAgent update status as per the last GoalState. - Returns: None if the last GS does not ask for requested version else VMAgentUpdateStatus - """ - if not conf.get_enable_ga_versioning(): - return None - - update_status = None - - try: - requested_version, manifest = self.__get_requested_version_and_agent_family_from_last_gs() - if manifest is None and goal_state_changed: - logger.info("Unable to report update status as no matching manifest found for family: {0}".format( - conf.get_autoupdate_gafamily())) - return None - - if requested_version is not None: - if CURRENT_VERSION == requested_version: - status = VMAgentUpdateStatuses.Success - code = 0 - else: - status = VMAgentUpdateStatuses.Error - code = 1 - update_status = VMAgentUpdateStatus(expected_version=manifest.requested_version_string, status=status, - code=code) - except Exception as error: - if goal_state_changed: - err_msg = "[This error will only be logged once per goal state] " \ - "Ran into error when trying to fetch updateStatus for the agent, skipping reporting update satus. Error: {0}".format( - textutil.format_exception(error)) - logger.warn(err_msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=err_msg, log_event=False) - - return update_status - - def _report_status(self, exthandlers_handler): - vm_agent_update_status = self.__get_vmagent_update_status(self._processing_new_extensions_goal_state()) + def _report_status(self, exthandlers_handler, vm_agent_update_status): # report_ext_handlers_status does its own error handling and returns None if an error occurred vm_status = exthandlers_handler.report_ext_handlers_status( goal_state_changed=self._processing_new_extensions_goal_state(), @@ -1067,173 +940,6 @@ def _shutdown(self): str(e)) return - def __get_requested_version_and_agent_family_from_last_gs(self): - """ - Get the requested version and corresponding manifests from last GS if supported - Returns: (Requested Version, Manifest) if supported and available - (None, None) if no manifests found in the last GS - (None, manifest) if not supported or not specified in GS - """ - family_name = conf.get_autoupdate_gafamily() - agent_families = self._goal_state.extensions_goal_state.agent_families - agent_families = [m for m in agent_families if m.name == family_name and len(m.uris) > 0] - if len(agent_families) == 0: - return None, None - if conf.get_enable_ga_versioning() and agent_families[0].is_requested_version_specified: - return agent_families[0].requested_version, agent_families[0] - return None, agent_families[0] - - def _download_agent_if_upgrade_available(self, protocol, base_version=CURRENT_VERSION): - """ - This function downloads the new agent if an update is available. - If a requested version is available in goal state, then only that version is downloaded (new-update model) - Else, we periodically (1hr by default) checks if new Agent upgrade is available and download it on filesystem if available (old-update model) - rtype: Boolean - return: True if current agent is no longer available or an agent with a higher version number is available - else False - """ - - def report_error(msg_, version_=CURRENT_VERSION, op=WALAEventOperation.Download): - logger.warn(msg_) - add_event(AGENT_NAME, op=op, version=version_, is_success=False, message=msg_, log_event=False) - - def can_proceed_with_requested_version(): - if not gs_updated: - # If the goal state didn't change, don't process anything. - return False - - # With the new model, we will get a new GS when CRP wants us to auto-update using required version. - # If there's no new goal state, don't proceed with anything - msg_ = "Found requested version in manifest: {0} for goal state {1}".format( - requested_version, goal_state_id) - logger.info(msg_) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg_, log_event=False) - - if requested_version < daemon_version: - # Don't process the update if the requested version is lesser than daemon version, - # as we don't support downgrades below daemon versions. - report_error( - "Can't process the upgrade as the requested version: {0} is < current daemon version: {1}".format( - requested_version, daemon_version), op=WALAEventOperation.AgentUpgrade) - return False - - return True - - def agent_upgrade_time_elapsed(now_): - if self.last_attempt_time is not None: - next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() - else: - next_attempt_time = now_ - if next_attempt_time > now_: - return False - return True - - agent_family_name = conf.get_autoupdate_gafamily() - gs_updated = False - daemon_version = self.__get_daemon_version_for_update() - try: - # Fetch the agent manifests from the latest Goal State - goal_state_id = self._goal_state.extensions_goal_state.id - gs_updated = self._processing_new_extensions_goal_state() - requested_version, agent_family = self.__get_requested_version_and_agent_family_from_last_gs() - if agent_family is None: - logger.verbose( - u"No manifest links found for agent family: {0} for goal state {1}, skipping update check".format( - agent_family_name, goal_state_id)) - return False - except Exception as err: - # If there's some issues in fetching the agent manifests, report it only on goal state change - msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err)) - if gs_updated: - report_error(msg) - else: - logger.verbose(msg) - return False - - if requested_version is not None: - # If GA versioning is enabled and requested version present in GS, and it's a new GS, follow new logic - if not can_proceed_with_requested_version(): - return False - else: - # If no requested version specified in the Goal State, follow the old auto-update logic - # Note: If the first Goal State contains a requested version, this timer won't start (i.e. self.last_attempt_time won't be updated). - # If any subsequent goal state does not contain requested version, this timer will start then, and we will - # download all versions available in PIR and auto-update to the highest available version on that goal state. - now = time.time() - if not agent_upgrade_time_elapsed(now): - return False - - logger.info("No requested version specified, checking for all versions for agent update (family: {0})", - agent_family_name) - self.last_attempt_time = now - - try: - # If we make it to this point, then either there is a requested version in a new GS (new auto-update model), - # or the 1hr time limit has elapsed for us to check the agent manifest for updates (old auto-update model). - pkg_list = ExtHandlerPackageList() - - # If the requested version is the current version, don't download anything; - # the call to purge() below will delete all other agents from disk - # In this case, no need to even fetch the GA family manifest as we don't need to download any agent. - if requested_version is not None and requested_version == CURRENT_VERSION: - packages_to_download = [] - msg = "The requested version is running as the current version: {0}".format(requested_version) - logger.info(msg) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg) - else: - agent_manifest = self._goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) - pkg_list = agent_manifest.pkg_list - packages_to_download = pkg_list.versions - - # Verify the requested version is in GA family manifest (if specified) - if requested_version is not None and requested_version != CURRENT_VERSION: - for pkg in pkg_list.versions: - if FlexibleVersion(pkg.version) == requested_version: - # Found a matching package, only download that one - packages_to_download = [pkg] - break - else: - msg = "No matching package found in the agent manifest for requested version: {0} in goal state {1}, skipping agent update".format( - requested_version, goal_state_id) - report_error(msg, version_=requested_version) - return False - - # Set the agents to those available for download at least as current as the existing agent - # or to the requested version (if specified) - is_fast_track_goal_state = self._goal_state.extensions_goal_state.source == GoalStateSource.FastTrack - agents_to_download = [GuestAgent.from_agent_package(pkg, protocol, is_fast_track_goal_state) for pkg in packages_to_download] - - # Filter out the agents that were downloaded/extracted successfully. If the agent was not installed properly, - # we delete the directory and the zip package from the filesystem - self._set_and_sort_agents([agent for agent in agents_to_download if agent.is_available]) - - # Remove from disk any agent no longer needed in the VM. - # If requested version is provided, this would delete all other agents present on the VM except - - # - the current version and the requested version if requested version != current version - # - only the current version if requested version == current version - # Note: - # The code leaves on disk available, but blacklisted, agents to preserve the state. - # Otherwise, those agents could be downloaded again and inappropriately retried. - self._purge_agents() - self._filter_blacklisted_agents() - - # If there are no agents available to upgrade/downgrade to, return False - if len(self.agents) == 0: - return False - - if requested_version is not None: - # In case of requested version, return True if an agent with a different version number than the - # current version is available that is higher than the current daemon version - return self.agents[0].version != base_version and self.agents[0].version > daemon_version - else: - # Else, return True if the highest agent is > base_version (CURRENT_VERSION) - return self.agents[0].version > base_version - - except Exception as err: - msg = u"Exception downloading agents for update: {0}".format(textutil.format_exception(err)) - report_error(msg) - return False - def _write_pid_file(self): pid_files = self._get_pid_files() @@ -1421,58 +1127,6 @@ def _execute_run_command(command): msg = "Error while checking ip table rules:{0}".format(ustr(e)) logger.error(msg) - def __get_next_upgrade_times(self): - """ - Get the next upgrade times - return: Next Normal Upgrade Time, Next Hotfix Upgrade Time - """ - - def get_next_process_time(last_val, frequency): - return now if last_val is None else last_val + frequency - - now = time.time() - next_hotfix_time = get_next_process_time(self._last_hotfix_upgrade_time, conf.get_hotfix_upgrade_frequency()) - next_normal_time = get_next_process_time(self._last_normal_upgrade_time, conf.get_normal_upgrade_frequency()) - - return next_normal_time, next_hotfix_time - - @staticmethod - def __get_agent_upgrade_type(available_agent): - # We follow semantic versioning for the agent, if . is same, then . has changed. - # In this case, we consider it as a Hotfix upgrade. Else we consider it a Normal upgrade. - if available_agent.version.major == CURRENT_VERSION.major and available_agent.version.minor == CURRENT_VERSION.minor: - return AgentUpgradeType.Hotfix - return AgentUpgradeType.Normal - - def __upgrade_agent_if_permitted(self): - """ - Check every 4hrs for a Hotfix Upgrade and 24 hours for a Normal upgrade and upgrade the agent if available. - raises: ExitException when a new upgrade is available in the relevant time window, else returns - """ - - next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() - now = time.time() - # Not permitted to update yet for any of the AgentUpgradeModes - if next_hotfix_time > now and next_normal_time > now: - return - - # Update the last upgrade check time even if no new agent is available for upgrade - self._last_hotfix_upgrade_time = now if next_hotfix_time <= now else self._last_hotfix_upgrade_time - self._last_normal_upgrade_time = now if next_normal_time <= now else self._last_normal_upgrade_time - - available_agent = self.get_latest_agent_greater_than_daemon() - if available_agent is None or available_agent.version <= CURRENT_VERSION: - logger.verbose("No agent upgrade discovered") - return - - upgrade_type = self.__get_agent_upgrade_type(available_agent) - upgrade_message = "{0} Agent upgrade discovered, updating to {1} -- exiting".format(upgrade_type, - available_agent.name) - - if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( - upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): - raise AgentUpgradeExitException(upgrade_message) - def _reset_legacy_blacklisted_agents(self): # Reset the state of all blacklisted agents that were blacklisted by legacy agents (i.e. not during auto-update) @@ -1485,290 +1139,3 @@ def _reset_legacy_blacklisted_agents(self): agent.clear_error() except Exception as err: logger.warn("Unable to reset legacy blacklisted agents due to: {0}".format(err)) - - -class GuestAgent(object): - def __init__(self, path, pkg, protocol, is_fast_track_goal_state): - """ - If 'path' is given, the object is initialized to the version installed under that path. - - If 'pkg' is given, the version specified in the package information is downloaded and the object is - initialized to that version. - - 'is_fast_track_goal_state' and 'protocol' are used only when a package is downloaded. - - NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly - """ - self._is_fast_track_goal_state = is_fast_track_goal_state - self.pkg = pkg - self._protocol = protocol - version = None - if path is not None: - m = AGENT_DIR_PATTERN.match(path) - if m is None: - raise UpdateError(u"Illegal agent directory: {0}".format(path)) - version = m.group(1) - elif self.pkg is not None: - version = pkg.version - - if version is None: - raise UpdateError(u"Illegal agent version: {0}".format(version)) - self.version = FlexibleVersion(version) - - location = u"disk" if path is not None else u"package" - logger.verbose(u"Loading Agent {0} from {1}", self.name, location) - - self.error = GuestAgentError(self.get_agent_error_file()) - self.error.load() - - try: - self._ensure_downloaded() - self._ensure_loaded() - except Exception as e: - # If we're unable to download/unpack the agent, delete the Agent directory - try: - if os.path.isdir(self.get_agent_dir()): - shutil.rmtree(self.get_agent_dir(), ignore_errors=True) - except Exception as err: - logger.warn("Unable to delete Agent files: {0}".format(err)) - msg = u"Agent {0} install failed with exception:".format( - self.name) - detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e)) - add_event( - AGENT_NAME, - version=self.version, - op=WALAEventOperation.Install, - is_success=False, - message=detailed_msg) - - @staticmethod - def from_installed_agent(path): - """ - Creates an instance of GuestAgent using the agent installed in the given 'path'. - """ - return GuestAgent(path, None, None, False) - - @staticmethod - def from_agent_package(package, protocol, is_fast_track_goal_state): - """ - Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it. - """ - return GuestAgent(None, package, protocol, is_fast_track_goal_state) - - @property - def name(self): - return "{0}-{1}".format(AGENT_NAME, self.version) - - def get_agent_cmd(self): - return self.manifest.get_enable_command() - - def get_agent_dir(self): - return os.path.join(conf.get_lib_dir(), self.name) - - def get_agent_error_file(self): - return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE) - - def get_agent_manifest_path(self): - return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE) - - def get_agent_pkg_path(self): - return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip")) - - def clear_error(self): - self.error.clear() - self.error.save() - - @property - def is_available(self): - return self.is_downloaded and not self.is_blacklisted - - @property - def is_blacklisted(self): - return self.error is not None and self.error.is_blacklisted - - @property - def is_downloaded(self): - return self.is_blacklisted or \ - os.path.isfile(self.get_agent_manifest_path()) - - def mark_failure(self, is_fatal=False, reason=''): - try: - if not os.path.isdir(self.get_agent_dir()): - os.makedirs(self.get_agent_dir()) - self.error.mark_failure(is_fatal=is_fatal, reason=reason) - self.error.save() - if self.error.is_blacklisted: - msg = u"Agent {0} is permanently blacklisted".format(self.name) - logger.warn(msg) - add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False, - version=self.version) - except Exception as e: - logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) - - def _ensure_downloaded(self): - logger.verbose(u"Ensuring Agent {0} is downloaded", self.name) - - if self.is_downloaded: - logger.verbose(u"Agent {0} was previously downloaded - skipping download", self.name) - return - - if self.pkg is None: - raise UpdateError(u"Agent {0} is missing package and download URIs".format( - self.name)) - - self._download() - - msg = u"Agent {0} downloaded successfully".format(self.name) - logger.verbose(msg) - add_event( - AGENT_NAME, - version=self.version, - op=WALAEventOperation.Install, - is_success=True, - message=msg) - - def _ensure_loaded(self): - self._load_manifest() - self._load_error() - - def _download(self): - try: - self._protocol.client.download_zip_package("agent package", self.pkg.uris, self.get_agent_pkg_path(), self.get_agent_dir(), use_verify_header=self._is_fast_track_goal_state) - except Exception as exception: - msg = "Unable to download Agent {0}: {1}".format(self.name, ustr(exception)) - add_event( - AGENT_NAME, - op=WALAEventOperation.Download, - version=CURRENT_VERSION, - is_success=False, - message=msg) - raise UpdateError(msg) - - def _load_error(self): - try: - self.error = GuestAgentError(self.get_agent_error_file()) - self.error.load() - logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) - except Exception as e: - logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e)) - - def _load_manifest(self): - path = self.get_agent_manifest_path() - if not os.path.isfile(path): - msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE) - raise UpdateError(msg) - - with open(path, "r") as manifest_file: - try: - manifests = json.load(manifest_file) - except Exception as e: - msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e)) - raise UpdateError(msg) - if type(manifests) is list: - if len(manifests) <= 0: - msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE) - raise UpdateError(msg) - manifest = manifests[0] - else: - manifest = manifests - - try: - self.manifest = HandlerManifest(manifest) # pylint: disable=W0201 - if len(self.manifest.get_enable_command()) <= 0: - raise Exception(u"Manifest is missing the enable command") - except Exception as e: - msg = u"Agent {0} has an illegal {1}: {2}".format( - self.name, - AGENT_MANIFEST_FILE, - ustr(e)) - raise UpdateError(msg) - - logger.verbose( - u"Agent {0} loaded manifest from {1}", - self.name, - self.get_agent_manifest_path()) - logger.verbose(u"Successfully loaded Agent {0} {1}: {2}", - self.name, - AGENT_MANIFEST_FILE, - ustr(self.manifest.data)) - return - - -class GuestAgentError(object): - def __init__(self, path): - self.last_failure = 0.0 - self.was_fatal = False - if path is None: - raise UpdateError(u"GuestAgentError requires a path") - self.path = path - self.failure_count = 0 - self.reason = '' - - self.clear() - return - - def mark_failure(self, is_fatal=False, reason=''): - self.last_failure = time.time() - self.failure_count += 1 - self.was_fatal = is_fatal - self.reason = reason - return - - def clear(self): - self.last_failure = 0.0 - self.failure_count = 0 - self.was_fatal = False - self.reason = '' - return - - @property - def is_blacklisted(self): - return self.was_fatal or self.failure_count >= MAX_FAILURE - - def load(self): - if self.path is not None and os.path.isfile(self.path): - try: - with open(self.path, 'r') as f: - self.from_json(json.load(f)) - except Exception as error: - # The error.json file is only supposed to be written only by the agent. - # If for whatever reason the file is malformed, just delete it to reset state of the errors. - logger.warn( - "Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format( - self.path, textutil.format_exception(error))) - try: - os.remove(self.path) - except Exception: - # We try best case efforts to delete the file, ignore error if we're unable to do so - pass - return - - def save(self): - if os.path.isdir(os.path.dirname(self.path)): - with open(self.path, 'w') as f: - json.dump(self.to_json(), f) - return - - def from_json(self, data): - self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0)) - self.failure_count = max(self.failure_count, data.get(u"failure_count", 0)) - self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) - reason = data.get(u"reason", '') - self.reason = reason if reason != '' else self.reason - return - - def to_json(self): - data = { - u"last_failure": self.last_failure, - u"failure_count": self.failure_count, - u"was_fatal": self.was_fatal, - u"reason": ustr(self.reason) - } - return data - - def __str__(self): - return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - self.last_failure, - self.failure_count, - self.was_fatal, - self.reason) diff --git a/makepkg.py b/makepkg.py index 5ec04d5d8..bc4aad4c3 100755 --- a/makepkg.py +++ b/makepkg.py @@ -8,8 +8,9 @@ import subprocess import sys -from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, AGENT_LONG_VERSION -from azurelinuxagent.ga.update import AGENT_MANIFEST_FILE +from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, \ + AGENT_LONG_VERSION +from azurelinuxagent.ga.guestagent import AGENT_MANIFEST_FILE MANIFEST = '''[{{ "name": "{0}", diff --git a/tests/data/wire/ext_conf_missing_family.xml b/tests/data/wire/ext_conf_missing_family.xml index 058c40a88..9e13d03ac 100644 --- a/tests/data/wire/ext_conf_missing_family.xml +++ b/tests/data/wire/ext_conf_missing_family.xml @@ -4,27 +4,6 @@ xmlns:i="http://www.w3.org/2001/XMLSchema-instance"> - Prod - - - - Test - - https://mock-goal-state/rdfepirv2bl2prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr04.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl4prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl4prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr04.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr06.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr09a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl6prdstr02a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - eastus diff --git a/tests/data/wire/ext_conf_missing_requested_version.xml b/tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml similarity index 100% rename from tests/data/wire/ext_conf_missing_requested_version.xml rename to tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml diff --git a/tests/ga/mocks.py b/tests/ga/mocks.py index 6fbc63d7d..e42dd5545 100644 --- a/tests/ga/mocks.py +++ b/tests/ga/mocks.py @@ -18,6 +18,8 @@ import contextlib from mock import PropertyMock + +from azurelinuxagent.ga.agent_update import AgentUpdateHandler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.remoteaccess import RemoteAccessHandler from azurelinuxagent.ga.update import UpdateHandler, get_update_handler @@ -30,6 +32,7 @@ def mock_update_handler(protocol, on_new_iteration=lambda _: None, exthandlers_handler=None, remote_access_handler=None, + agent_update_handler=None, autoupdate_enabled=False, check_daemon_running=False, start_background_threads=False, @@ -71,6 +74,9 @@ def is_running(*args): # mock for property UpdateHandler.is_running, which cont if remote_access_handler is None: remote_access_handler = RemoteAccessHandler(protocol) + if agent_update_handler is None: + agent_update_handler = AgentUpdateHandler(protocol) + cleanup_functions = [] def patch_object(target, attribute): @@ -80,39 +86,40 @@ def patch_object(target, attribute): try: with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): - with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): - with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): - with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep: - with patch('sys.exit', side_effect=lambda _: 0) as mock_exit: - if not check_daemon_running: - patch_object(UpdateHandler, "_check_daemon_running") - if not start_background_threads: - patch_object(UpdateHandler, "_start_threads") - if not check_background_threads: - patch_object(UpdateHandler, "_check_threads_running") - - def get_exit_code(): - if mock_exit.call_count == 0: - raise Exception("The UpdateHandler did not exit") - if mock_exit.call_count != 1: - raise Exception("The UpdateHandler exited multiple times ({0})".format(mock_exit.call_count)) - args, _ = mock_exit.call_args - return args[0] - - def get_iterations(): - return iteration_count[0] - - def get_iterations_completed(): - return sleep.call_count - - update_handler = get_update_handler() - update_handler.protocol_util.get_protocol = Mock(return_value=protocol) - update_handler.get_exit_code = get_exit_code - update_handler.get_iterations = get_iterations - update_handler.get_iterations_completed = get_iterations_completed - - yield update_handler + with patch("azurelinuxagent.ga.agent_update.get_agent_update_handler", return_value=agent_update_handler): + with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): + with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): + with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep: + with patch('sys.exit', side_effect=lambda _: 0) as mock_exit: + if not check_daemon_running: + patch_object(UpdateHandler, "_check_daemon_running") + if not start_background_threads: + patch_object(UpdateHandler, "_start_threads") + if not check_background_threads: + patch_object(UpdateHandler, "_check_threads_running") + + def get_exit_code(): + if mock_exit.call_count == 0: + raise Exception("The UpdateHandler did not exit") + if mock_exit.call_count != 1: + raise Exception("The UpdateHandler exited multiple times ({0})".format(mock_exit.call_count)) + args, _ = mock_exit.call_args + return args[0] + + def get_iterations(): + return iteration_count[0] + + def get_iterations_completed(): + return sleep.call_count + + update_handler = get_update_handler() + update_handler.protocol_util.get_protocol = Mock(return_value=protocol) + update_handler.get_exit_code = get_exit_code + update_handler.get_iterations = get_iterations + update_handler.get_iterations_completed = get_iterations_completed + + yield update_handler finally: for f in cleanup_functions: f() diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update.py new file mode 100644 index 000000000..5386bdaf6 --- /dev/null +++ b/tests/ga/test_agent_update.py @@ -0,0 +1,311 @@ +import contextlib +import json +import os + +from azurelinuxagent.common import conf +from azurelinuxagent.common.event import WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.future import ustr, httpclient +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses + +from azurelinuxagent.common.protocol.util import ProtocolUtil +from azurelinuxagent.common.version import CURRENT_VERSION +from azurelinuxagent.ga.agent_update import get_agent_update_handler +from azurelinuxagent.ga.guestagent import GAUpdateReportState +from tests.ga.test_update import UpdateTestCase +from tests.protocol.HttpRequestPredicates import HttpRequestPredicates +from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse +from tests.protocol.mockwiredata import DATA_FILE +from tests.tools import clear_singleton_instances, load_bin_data, patch + + +class TestAgentUpdate(UpdateTestCase): + + def setUp(self): + UpdateTestCase.setUp(self) + # Since ProtocolUtil is a singleton per thread, we need to clear it to ensure that the test cases do not + # reuse a previous state + clear_singleton_instances(ProtocolUtil) + + @contextlib.contextmanager + def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True): + # Default to DATA_FILE of test_data parameter raises the pylint warning + # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) + test_data = DATA_FILE if test_data is None else test_data + + with mock_wire_protocol(test_data) as protocol: + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + protocol.mock_wire_data.call_counts['agentArtifact'] += 1 + return MockHttpResponse(status=httpclient.OK, body=agent_pkg) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + with patch("azurelinuxagent.ga.agent_update.add_event") as mock_telemetry: + agent_update_handler = get_agent_update_handler(protocol) + agent_update_handler._protocol = protocol + yield agent_update_handler, mock_telemetry + + def __assert_agent_directories_available(self, versions): + for version in versions: + self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) + + def __assert_agent_directories_exist_and_others_dont_exist(self, versions): + self.__assert_agent_directories_available(versions=versions) + other_agents = [agent_dir for agent_dir in self.agent_dirs() if + agent_dir not in [self.agent_dir(version) for version in versions]] + self.assertFalse(any(other_agents), + "All other agents should be purged from agent dir: {0}".format(other_agents)) + + def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1, version="9.9.9.10"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Goal state incarnation_{0} is requesting a new agent version {1}'.format(inc, version) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent requested version found. Got: {0}".format( + mock_telemetry.call_args_list)) + + def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent package not found. Got: {0}".format( + mock_telemetry.call_args_list)) + + def test_it_should_not_update_when_autoupdate_disabled(self): + self.prepare_agents(count=1) + with self.__get_agent_update_handler(autoupdate_enabled=False) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "requesting a new agent version" in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "should not check for requested version") + + def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=False): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + + def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + version = "5.2.0.1" + with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + # Now we shouldn't check for download if update not allowed.This run should not add new logs + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + + def test_it_should_update_to_largest_version_if_requested_version_not_available(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + str(CURRENT_VERSION)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "requesting a new agent version" in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "requested version should be same as current version") + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_upgrade_agent_if_requested_version_is_available_greater_than_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + downgraded_version = "1.2.0" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) + self.__assert_agent_directories_exist_and_others_dont_exist( + versions=[downgraded_version, str(CURRENT_VERSION)]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + version = "5.2.0.4" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.assertFalse(os.path.exists(self.agent_dir(version)), + "New agent directory should not be found") + + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + + def test_handles_missing_agent_family(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + + def test_it_should_report_update_status_with_success(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + GAUpdateReportState.report_error_msg = "" + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + str(CURRENT_VERSION)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Success, vm_agent_update_status.status) + self.assertEqual(0, vm_agent_update_status.code) + self.assertEqual(str(CURRENT_VERSION), vm_agent_update_status.expected_version) + + def test_it_should_report_update_status_with_error_on_download_fail(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + @contextlib.contextmanager + def mock_agent_update_handler(test_data): + with mock_wire_protocol(test_data) as protocol: + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + protocol.set_http_handlers(http_get_handler=get_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + agent_update_handler_local = get_agent_update_handler(protocol) + yield agent_update_handler_local + + with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + GAUpdateReportState.report_error_msg = "" + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) + self.assertEqual(1, vm_agent_update_status.code) + self.assertEqual("9.9.9.10", vm_agent_update_status.expected_version) + self.assertIn("Unable to download Agent", vm_agent_update_status.message) + + def test_it_should_report_update_status_with_missing_requested_version_error(self): + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + + @contextlib.contextmanager + def mock_agent_update_handler(test_data): + with mock_wire_protocol(test_data) as protocol: + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + protocol.set_http_handlers(http_get_handler=get_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + agent_update_handler_local = get_agent_update_handler(protocol) + yield agent_update_handler_local + + with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + GAUpdateReportState.report_error_msg = "" + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) + self.assertEqual(1, vm_agent_update_status.code) + self.assertIn("Missing requested version", vm_agent_update_status.message) diff --git a/tests/ga/test_guestagent.py b/tests/ga/test_guestagent.py new file mode 100644 index 000000000..81e248bb0 --- /dev/null +++ b/tests/ga/test_guestagent.py @@ -0,0 +1,309 @@ +import json +import os + +from azurelinuxagent.common import conf +from azurelinuxagent.common.exception import UpdateError +from azurelinuxagent.ga.guestagent import GuestAgent, AGENT_MANIFEST_FILE, AGENT_ERROR_FILE, GuestAgentError, \ + MAX_FAILURE +from azurelinuxagent.common.future import httpclient +from azurelinuxagent.common.protocol.restapi import ExtHandlerPackage +from azurelinuxagent.common.version import AGENT_NAME +from tests.ga.test_update import UpdateTestCase, EMPTY_MANIFEST, WITH_ERROR, NO_ERROR +from tests.protocol import mockwiredata +from tests.protocol.mocks import MockHttpResponse, mock_wire_protocol +from tests.tools import load_bin_data, patch + + +class TestGuestAgent(UpdateTestCase): + def setUp(self): + UpdateTestCase.setUp(self) + self.copy_agents(self._get_agent_file_path()) + self.agent_path = os.path.join(self.tmp_dir, self._get_agent_name()) + + def test_creation(self): + with self.assertRaises(UpdateError): + GuestAgent.from_installed_agent("A very bad file name") + + with self.assertRaises(UpdateError): + GuestAgent.from_installed_agent("{0}-a.bad.version".format(AGENT_NAME)) + + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertNotEqual(None, agent) + self.assertEqual(self._get_agent_name(), agent.name) + self.assertEqual(self._get_agent_version(), agent.version) + + self.assertEqual(self.agent_path, agent.get_agent_dir()) + + path = os.path.join(self.agent_path, AGENT_MANIFEST_FILE) + self.assertEqual(path, agent.get_agent_manifest_path()) + + self.assertEqual( + os.path.join(self.agent_path, AGENT_ERROR_FILE), + agent.get_agent_error_file()) + + path = ".".join((os.path.join(conf.get_lib_dir(), self._get_agent_name()), "zip")) + self.assertEqual(path, agent.get_agent_pkg_path()) + + self.assertTrue(agent.is_downloaded) + self.assertFalse(agent.is_blacklisted) + self.assertTrue(agent.is_available) + + def test_clear_error(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.mark_failure(is_fatal=True) + + self.assertTrue(agent.error.last_failure > 0.0) + self.assertEqual(1, agent.error.failure_count) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + agent.clear_error() + self.assertEqual(0.0, agent.error.last_failure) + self.assertEqual(0, agent.error.failure_count) + self.assertFalse(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + def test_is_available(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + + self.assertTrue(agent.is_available) + agent.mark_failure(is_fatal=True) + self.assertFalse(agent.is_available) + + def test_is_blacklisted(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertFalse(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + agent.mark_failure(is_fatal=True) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + def test_is_downloaded(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(agent.is_downloaded) + + def test_mark_failure(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + + agent.mark_failure() + self.assertEqual(1, agent.error.failure_count) + + agent.mark_failure(is_fatal=True) + self.assertEqual(2, agent.error.failure_count) + self.assertTrue(agent.is_blacklisted) + + def test_load_manifest(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + agent._load_manifest() + self.assertEqual(agent.manifest.get_enable_command(), + agent.get_agent_cmd()) + + def test_load_manifest_missing(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + os.remove(agent.get_agent_manifest_path()) + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_manifest_is_empty(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) + + with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin + json.dump(EMPTY_MANIFEST, file) + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_manifest_is_malformed(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) + + with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin + file.write("This is not JSON data") + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_error(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.error = None + + agent._load_error() + self.assertTrue(agent.error is not None) + + def test_download(self): + self.remove_agents() + self.assertFalse(os.path.isdir(self.agent_path)) + + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' + + def http_get_handler(uri, *_, **__): + if uri == agent_uri: + response = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + return MockHttpResponse(status=httpclient.OK, body=response) + return None + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertTrue(os.path.isdir(agent.get_agent_dir())) + self.assertTrue(agent.is_downloaded) + + def test_download_fail(self): + self.remove_agents() + self.assertFalse(os.path.isdir(self.agent_path)) + + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' + + def http_get_handler(uri, *_, **__): + if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return None + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + with patch("azurelinuxagent.ga.guestagent.add_event") as add_event: + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertFalse(os.path.isfile(self.agent_path)) + + messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] + self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) + self.assertIn('[UpdateError] Unable to download Agent WALinuxAgent-9.9.9.9', messages[0], "The install error does not include the expected message") + + self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") + + def test_invalid_agent_package_does_not_blacklist_the_agent(self): + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.9' + + def http_get_handler(uri, *_, **__): + if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): + response = load_bin_data("ga/WALinuxAgent-9.9.9.9-no_manifest.zip") + return MockHttpResponse(status=httpclient.OK, body=response) + return None + + pkg = ExtHandlerPackage(version="9.9.9.9") + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") + self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") + + @patch("azurelinuxagent.ga.update.GuestAgent._download") + def test_ensure_download_skips_blacklisted(self, mock_download): + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertEqual(0, mock_download.call_count) + + agent.clear_error() + agent.mark_failure(is_fatal=True) + self.assertTrue(agent.is_blacklisted) + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(None) + # _download is mocked so there will be no http request; passing a None protocol + agent = GuestAgent.from_agent_package(pkg, None, False) + + self.assertEqual(1, agent.error.failure_count) + self.assertTrue(agent.error.was_fatal) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(0, mock_download.call_count) + + +class TestGuestAgentError(UpdateTestCase): + def test_creation(self): + self.assertRaises(TypeError, GuestAgentError) + self.assertRaises(UpdateError, GuestAgentError, None) + + with self.get_error_file(error_data=WITH_ERROR) as path: + err = GuestAgentError(path.name) + err.load() + self.assertEqual(path.name, err.path) + self.assertNotEqual(None, err) + + self.assertEqual(WITH_ERROR["last_failure"], err.last_failure) + self.assertEqual(WITH_ERROR["failure_count"], err.failure_count) + self.assertEqual(WITH_ERROR["was_fatal"], err.was_fatal) + return + + def test_clear(self): + with self.get_error_file(error_data=WITH_ERROR) as path: + err = GuestAgentError(path.name) + err.load() + self.assertEqual(path.name, err.path) + self.assertNotEqual(None, err) + + err.clear() + self.assertEqual(NO_ERROR["last_failure"], err.last_failure) + self.assertEqual(NO_ERROR["failure_count"], err.failure_count) + self.assertEqual(NO_ERROR["was_fatal"], err.was_fatal) + return + + def test_save(self): + err1 = self.create_error() + err1.mark_failure() + err1.mark_failure(is_fatal=True) + + err2 = self.create_error(err1.to_json()) + self.assertEqual(err1.last_failure, err2.last_failure) + self.assertEqual(err1.failure_count, err2.failure_count) + self.assertEqual(err1.was_fatal, err2.was_fatal) + + def test_mark_failure(self): + err = self.create_error() + self.assertFalse(err.is_blacklisted) + + for i in range(0, MAX_FAILURE): # pylint: disable=unused-variable + err.mark_failure() + + # Agent failed >= MAX_FAILURE, it should be blacklisted + self.assertTrue(err.is_blacklisted) + self.assertEqual(MAX_FAILURE, err.failure_count) + return + + def test_mark_failure_permanent(self): + err = self.create_error() + + self.assertFalse(err.is_blacklisted) + + # Fatal errors immediately blacklist + err.mark_failure(is_fatal=True) + self.assertTrue(err.is_blacklisted) + self.assertTrue(err.failure_count < MAX_FAILURE) + return + + def test_str(self): + err = self.create_error(error_data=NO_ERROR) + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + NO_ERROR["last_failure"], + NO_ERROR["failure_count"], + NO_ERROR["was_fatal"], + NO_ERROR["reason"]) + self.assertEqual(s, str(err)) + + err = self.create_error(error_data=WITH_ERROR) + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + WITH_ERROR["last_failure"], + WITH_ERROR["failure_count"], + WITH_ERROR["was_fatal"], + WITH_ERROR["reason"]) + self.assertEqual(s, str(err)) + return diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index c5a20b516..f63d1d42d 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -3,6 +3,7 @@ import json +from azurelinuxagent.ga.agent_update import get_agent_update_handler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.update import get_update_handler from tests.ga.mocks import mock_update_handler @@ -78,21 +79,23 @@ def test_report_status_should_log_errors_only_once_per_goal_state(self): update_handler = get_update_handler() update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here exthandlers_handler = ExtHandlersHandler(protocol) - update_handler._report_status(exthandlers_handler) + agent_update_handler = get_agent_update_handler(protocol) + agent_update_status = agent_update_handler.get_vmagent_update_status() + update_handler._report_status(exthandlers_handler, agent_update_status) self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler, agent_update_status) + update_handler._report_status(exthandlers_handler, agent_update_status) + update_handler._report_status(exthandlers_handler, agent_update_status) self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler, agent_update_status) self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 1b84d6f1c..22ec8dbef 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -21,6 +21,8 @@ from datetime import datetime, timedelta from threading import current_thread from azurelinuxagent.common.protocol.imds import ComputeInfo +from azurelinuxagent.ga.guestagent import GuestAgent, GuestAgentError, \ + AGENT_ERROR_FILE, GAUpdateReportState from tests.common.osutil.test_default import TestOSUtil import azurelinuxagent.common.osutil.default as osutil @@ -28,7 +30,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.event import EVENTS_DIRECTORY, WALAEventOperation -from azurelinuxagent.common.exception import ProtocolError, UpdateError, HttpError, \ +from azurelinuxagent.common.exception import HttpError, \ ExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler @@ -42,13 +44,12 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ - AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION, set_daemon_version, \ - __DAEMON_VERSION_ENV_VARIABLE as DAEMON_VERSION_ENV_VARIABLE + AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, ExtHandlerInstance, HandlerEnvironment, ExtensionStatusValue -from azurelinuxagent.ga.update import GuestAgent, GuestAgentError, MAX_FAILURE, AGENT_MANIFEST_FILE, \ - get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, AGENT_ERROR_FILE, ORPHAN_WAIT_INTERVAL, \ +from azurelinuxagent.ga.update import \ + get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, ORPHAN_WAIT_INTERVAL, \ CHILD_LAUNCH_RESTART_MAX, CHILD_HEALTH_INTERVAL, GOAL_STATE_PERIOD_EXTENSIONS_DISABLED, UpdateHandler, \ - READONLY_FILE_GLOBS, ExtensionsSummary, AgentUpgradeType + READONLY_FILE_GLOBS, ExtensionsSummary from tests.ga.mocks import mock_update_handler from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT, DATA_FILE_VM_SETTINGS @@ -57,6 +58,7 @@ from tests.protocol import mockwiredata from tests.protocol.HttpRequestPredicates import HttpRequestPredicates + NO_ERROR = { "last_failure": 0.0, "failure_count": 0, @@ -100,7 +102,7 @@ def faux_logger(): @contextlib.contextmanager -def _get_update_handler(iterations=1, test_data=None, protocol=None): +def _get_update_handler(iterations=1, test_data=None, protocol=None, autoupdate_enabled=True): """ This function returns a mocked version of the UpdateHandler object to be used for testing. It will only run the main loop [iterations] no of times. @@ -111,10 +113,10 @@ def _get_update_handler(iterations=1, test_data=None, protocol=None): with patch.object(HostPluginProtocol, "is_default_channel", False): if protocol is None: with mock_wire_protocol(test_data) as mock_protocol: - with mock_update_handler(mock_protocol, iterations=iterations, autoupdate_enabled=True) as update_handler: + with mock_update_handler(mock_protocol, iterations=iterations, autoupdate_enabled=autoupdate_enabled) as update_handler: yield update_handler, mock_protocol else: - with mock_update_handler(protocol, iterations=iterations, autoupdate_enabled=True) as update_handler: + with mock_update_handler(protocol, iterations=iterations, autoupdate_enabled=autoupdate_enabled) as update_handler: yield update_handler, protocol @@ -316,302 +318,6 @@ def replicate_agents(self, return dst_v -class TestGuestAgentError(UpdateTestCase): - def test_creation(self): - self.assertRaises(TypeError, GuestAgentError) - self.assertRaises(UpdateError, GuestAgentError, None) - - with self.get_error_file(error_data=WITH_ERROR) as path: - err = GuestAgentError(path.name) - err.load() - self.assertEqual(path.name, err.path) - self.assertNotEqual(None, err) - - self.assertEqual(WITH_ERROR["last_failure"], err.last_failure) - self.assertEqual(WITH_ERROR["failure_count"], err.failure_count) - self.assertEqual(WITH_ERROR["was_fatal"], err.was_fatal) - return - - def test_clear(self): - with self.get_error_file(error_data=WITH_ERROR) as path: - err = GuestAgentError(path.name) - err.load() - self.assertEqual(path.name, err.path) - self.assertNotEqual(None, err) - - err.clear() - self.assertEqual(NO_ERROR["last_failure"], err.last_failure) - self.assertEqual(NO_ERROR["failure_count"], err.failure_count) - self.assertEqual(NO_ERROR["was_fatal"], err.was_fatal) - return - - def test_save(self): - err1 = self.create_error() - err1.mark_failure() - err1.mark_failure(is_fatal=True) - - err2 = self.create_error(err1.to_json()) - self.assertEqual(err1.last_failure, err2.last_failure) - self.assertEqual(err1.failure_count, err2.failure_count) - self.assertEqual(err1.was_fatal, err2.was_fatal) - - def test_mark_failure(self): - err = self.create_error() - self.assertFalse(err.is_blacklisted) - - for i in range(0, MAX_FAILURE): # pylint: disable=unused-variable - err.mark_failure() - - # Agent failed >= MAX_FAILURE, it should be blacklisted - self.assertTrue(err.is_blacklisted) - self.assertEqual(MAX_FAILURE, err.failure_count) - return - - def test_mark_failure_permanent(self): - err = self.create_error() - - self.assertFalse(err.is_blacklisted) - - # Fatal errors immediately blacklist - err.mark_failure(is_fatal=True) - self.assertTrue(err.is_blacklisted) - self.assertTrue(err.failure_count < MAX_FAILURE) - return - - def test_str(self): - err = self.create_error(error_data=NO_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - NO_ERROR["last_failure"], - NO_ERROR["failure_count"], - NO_ERROR["was_fatal"], - NO_ERROR["reason"]) - self.assertEqual(s, str(err)) - - err = self.create_error(error_data=WITH_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - WITH_ERROR["last_failure"], - WITH_ERROR["failure_count"], - WITH_ERROR["was_fatal"], - WITH_ERROR["reason"]) - self.assertEqual(s, str(err)) - return - - -class TestGuestAgent(UpdateTestCase): - def setUp(self): - UpdateTestCase.setUp(self) - self.copy_agents(self._get_agent_file_path()) - self.agent_path = os.path.join(self.tmp_dir, self._get_agent_name()) - - def test_creation(self): - with self.assertRaises(UpdateError): - GuestAgent.from_installed_agent("A very bad file name") - - with self.assertRaises(UpdateError): - GuestAgent.from_installed_agent("{0}-a.bad.version".format(AGENT_NAME)) - - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertNotEqual(None, agent) - self.assertEqual(self._get_agent_name(), agent.name) - self.assertEqual(self._get_agent_version(), agent.version) - - self.assertEqual(self.agent_path, agent.get_agent_dir()) - - path = os.path.join(self.agent_path, AGENT_MANIFEST_FILE) - self.assertEqual(path, agent.get_agent_manifest_path()) - - self.assertEqual( - os.path.join(self.agent_path, AGENT_ERROR_FILE), - agent.get_agent_error_file()) - - path = ".".join((os.path.join(conf.get_lib_dir(), self._get_agent_name()), "zip")) - self.assertEqual(path, agent.get_agent_pkg_path()) - - self.assertTrue(agent.is_downloaded) - self.assertFalse(agent.is_blacklisted) - self.assertTrue(agent.is_available) - - def test_clear_error(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - agent.mark_failure(is_fatal=True) - - self.assertTrue(agent.error.last_failure > 0.0) - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - agent.clear_error() - self.assertEqual(0.0, agent.error.last_failure) - self.assertEqual(0, agent.error.failure_count) - self.assertFalse(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - def test_is_available(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - - self.assertTrue(agent.is_available) - agent.mark_failure(is_fatal=True) - self.assertFalse(agent.is_available) - - def test_is_blacklisted(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertFalse(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - agent.mark_failure(is_fatal=True) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - def test_is_downloaded(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(agent.is_downloaded) - - def test_mark_failure(self): - agent = GuestAgent.from_installed_agent(self.agent_path) - - agent.mark_failure() - self.assertEqual(1, agent.error.failure_count) - - agent.mark_failure(is_fatal=True) - self.assertEqual(2, agent.error.failure_count) - self.assertTrue(agent.is_blacklisted) - - def test_load_manifest(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - agent._load_manifest() - self.assertEqual(agent.manifest.get_enable_command(), - agent.get_agent_cmd()) - - def test_load_manifest_missing(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - os.remove(agent.get_agent_manifest_path()) - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_manifest_is_empty(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) - - with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin - json.dump(EMPTY_MANIFEST, file) - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_manifest_is_malformed(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) - - with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin - file.write("This is not JSON data") - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_error(self): - agent = GuestAgent.from_installed_agent(self.agent_path) - agent.error = None - - agent._load_error() - self.assertTrue(agent.error is not None) - - def test_download(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri == agent_uri: - response = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertTrue(os.path.isdir(agent.get_agent_dir())) - self.assertTrue(agent.is_downloaded) - - def test_download_fail(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) - return None - - agent_version = self._get_agent_version() - pkg = ExtHandlerPackage(version=str(agent_version)) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - with patch("azurelinuxagent.ga.update.add_event") as add_event: - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(os.path.isfile(self.agent_path)) - - messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] - self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) - self.assertIn(str.format('[UpdateError] Unable to download Agent WALinuxAgent-{0}', agent_version), messages[0], "The install error does not include the expected message") - - self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") - - def test_invalid_agent_package_does_not_blacklist_the_agent(self): - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.9' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - response = load_bin_data("ga/WALinuxAgent-9.9.9.9-no_manifest.zip") - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version="9.9.9.9") - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") - self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") - - @patch("azurelinuxagent.ga.update.GuestAgent._download") - def test_ensure_download_skips_blacklisted(self, mock_download): - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertEqual(0, mock_download.call_count) - - agent.clear_error() - agent.mark_failure(is_fatal=True) - self.assertTrue(agent.is_blacklisted) - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(None) - # _download is mocked so there will be no http request; passing a None protocol - agent = GuestAgent.from_agent_package(pkg, None, False) - - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.error.was_fatal) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(0, mock_download.call_count) - - class TestUpdate(UpdateTestCase): def setUp(self): UpdateTestCase.setUp(self) @@ -629,8 +335,6 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) def test_creation(self): - self.assertEqual(None, self.update_handler.last_attempt_time) - self.assertEqual(0, len(self.update_handler.agents)) self.assertEqual(None, self.update_handler.child_agent) @@ -854,9 +558,6 @@ def test_get_latest_agent(self): def test_get_latest_agent_excluded(self): self.prepare_agent(AGENT_VERSION) - self.assertFalse(self._test_upgrade_available( - versions=self.agent_versions(), - count=1)) self.assertEqual(None, self.update_handler.get_latest_agent_greater_than_daemon()) def test_get_latest_agent_no_updates(self): @@ -1193,85 +894,6 @@ def test_shutdown_ignores_exceptions(self): except Exception as e: # pylint: disable=unused-variable self.assertTrue(False, "Unexpected exception") # pylint: disable=redundant-unittest-assert - def _test_upgrade_available( - self, - base_version=FlexibleVersion(AGENT_VERSION), - protocol=None, - versions=None, - count=20): - - if protocol is None: - protocol = self._create_protocol(count=count, versions=versions) - - self.update_handler.protocol_util = protocol - self.update_handler._goal_state = protocol.get_goal_state() - self.update_handler._goal_state.extensions_goal_state.is_outdated = False - conf.get_autoupdate_gafamily = Mock(return_value=protocol.family) - - return self.update_handler._download_agent_if_upgrade_available(protocol, base_version=base_version) - - def test_upgrade_available_returns_true_on_first_use(self): - self.assertTrue(self._test_upgrade_available()) - - def test_upgrade_available_handles_missing_family(self): - data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" - - with mock_wire_protocol(data_file) as protocol: - self.update_handler.protocol_util = protocol - with patch('azurelinuxagent.common.logger.warn') as mock_logger: - with patch('azurelinuxagent.common.protocol.goal_state.GoalState.fetch_agent_manifest', side_effect=ProtocolError): - self.assertFalse(self.update_handler._download_agent_if_upgrade_available(protocol, base_version=CURRENT_VERSION)) - self.assertEqual(0, mock_logger.call_count) - - def test_upgrade_available_includes_old_agents(self): - self.prepare_agents() - - old_version = self.agent_versions()[-1] - old_count = old_version.version[-1] - - self.replicate_agents(src_v=old_version, count=old_count, increment=-1) - all_count = len(self.agent_versions()) - - self.assertTrue(self._test_upgrade_available(versions=self.agent_versions())) - self.assertEqual(all_count, len(self.update_handler.agents)) - - def test_upgrade_available_purges_old_agents(self): - self.prepare_agents() - agent_count = self.agent_count() - self.assertEqual(20, agent_count) - - agent_versions = self.agent_versions()[:3] - self.assertTrue(self._test_upgrade_available(versions=agent_versions)) - self.assertEqual(len(agent_versions), len(self.update_handler.agents)) - - # Purging always keeps the running agent - if CURRENT_VERSION not in agent_versions: - agent_versions.append(CURRENT_VERSION) - self.assertEqual(agent_versions, self.agent_versions()) - - def test_upgrade_available_skips_if_too_frequent(self): - conf.get_autoupdate_frequency = Mock(return_value=10000) - self.update_handler.last_attempt_time = time.time() - self.assertFalse(self._test_upgrade_available()) - - def test_upgrade_available_skips_when_no_new_versions(self): - self.prepare_agents() - base_version = self.agent_versions()[0] + 1 - self.assertFalse(self._test_upgrade_available(base_version=base_version)) - - def test_upgrade_available_skips_when_no_versions(self): - self.assertFalse(self._test_upgrade_available(protocol=ProtocolMock())) - - def test_upgrade_available_sorts(self): - self.prepare_agents() - self._test_upgrade_available() - - v = FlexibleVersion("100000") - for a in self.update_handler.agents: - self.assertTrue(v > a.version) - v = a.version - def test_write_pid_file(self): for n in range(1112): fileutil.write_file(os.path.join(self.tmp_dir, str(n) + "_waagent.pid"), ustr(n + 1)) @@ -1296,7 +918,7 @@ def test_update_happens_when_extensions_disabled(self): behavior never changes. """ with patch('azurelinuxagent.common.conf.get_extensions_enabled', return_value=False): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', return_value=True) as download_agent: + with patch('azurelinuxagent.ga.agent_update.AgentUpdateHandler.run') as download_agent: with mock_wire_protocol(DATA_FILE) as protocol: with mock_update_handler(protocol, autoupdate_enabled=True) as update_handler: update_handler.run() @@ -1359,7 +981,7 @@ def match_expected_info(): def test_it_should_recreate_handler_env_on_service_startup(self): iterations = 5 - with _get_update_handler(iterations) as (update_handler, protocol): + with _get_update_handler(iterations, autoupdate_enabled=False) as (update_handler, protocol): update_handler.run(debug=True) expected_handler = self._get_test_ext_handler_instance(protocol) @@ -1376,7 +998,7 @@ def test_it_should_recreate_handler_env_on_service_startup(self): # re-runnning the update handler. Then,ensure that the HandlerEnvironment file is recreated with eventsFolder # flag in HandlerEnvironment.json file. self._add_write_permission_to_goal_state_files() - with _get_update_handler(iterations=1) as (update_handler, protocol): + with _get_update_handler(iterations=1, autoupdate_enabled=False) as (update_handler, protocol): with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): update_handler.run(debug=True) @@ -1574,7 +1196,7 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): @contextlib.contextmanager def _setup_test_for_ext_event_dirs_retention(self): try: - with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT) as (update_handler, protocol): + with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT, autoupdate_enabled=False) as (update_handler, protocol): with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): update_handler.run(debug=True) expected_events_dirs = glob.glob(os.path.join(conf.get_ext_log_dir(), "*", EVENTS_DIRECTORY)) @@ -1624,62 +1246,69 @@ def test_it_should_recreate_extension_event_directories_for_existing_extensions_ def test_it_should_report_update_status_in_status_blob(self): with mock_wire_protocol(DATA_FILE) as protocol: - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - with patch.object(conf, "get_autoupdate_gafamily", return_value="Prod"): - with patch("azurelinuxagent.common.logger.warn") as patch_warn: - - protocol.aggregate_status = None - protocol.incarnation = 1 - - def mock_http_put(url, *args, **_): - if HttpRequestPredicates.is_host_plugin_status_request(url): - # Skip reading the HostGA request data as its encoded - return MockHttpResponse(status=500) - protocol.aggregate_status = json.loads(args[0]) - return MockHttpResponse(status=201) - - def update_goal_state_and_run_handler(): - protocol.incarnation += 1 - protocol.mock_wire_data.set_incarnation(protocol.incarnation) - self._add_write_permission_to_goal_state_files() - with _get_update_handler(iterations=1, protocol=protocol) as (update_handler, _): - update_handler.run(debug=True) - self.assertEqual(0, update_handler.get_exit_code(), - "Exit code should be 0; List of all warnings logged by the agent: {0}".format( - patch_warn.call_args_list)) - - protocol.set_http_handlers(http_put_handler=mock_http_put) - - # Case 1: No requested version in GS; updateStatus should not be reported - update_goal_state_and_run_handler() - self.assertFalse("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should not be reported if not asked in GS") - - # Case 2: Requested version in GS != Current Version; updateStatus should be error - protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") - self.assertEqual(update_status['expectedVersion'], "9.9.9.10", "incorrect version reported") - self.assertEqual(update_status['code'], 1, "incorrect code reported") - - # Case 3: Requested version in GS == Current Version; updateStatus should be Success - protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be reported if asked in GS") - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") - self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") - self.assertEqual(update_status['code'], 0, "incorrect code reported") - - # Case 4: Requested version removed in GS; no updateStatus should be reported - protocol.mock_wire_data.reload() - update_goal_state_and_run_handler() - self.assertFalse("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should not be reported if not asked in GS") + with patch.object(conf, "get_autoupdate_gafamily", return_value="Prod"): + with patch("azurelinuxagent.common.logger.warn") as patch_warn: + + protocol.aggregate_status = None + protocol.incarnation = 1 + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + def update_goal_state_and_run_handler(autoupdate_enabled = True): + protocol.incarnation += 1 + protocol.mock_wire_data.set_incarnation(protocol.incarnation) + self._add_write_permission_to_goal_state_files() + with _get_update_handler(iterations=1, protocol=protocol, autoupdate_enabled=autoupdate_enabled) as (update_handler, _): + GAUpdateReportState.report_error_msg = "" + update_handler.run(debug=True) + self.assertEqual(0, update_handler.get_exit_code(), + "Exit code should be 0; List of all warnings logged by the agent: {0}".format( + patch_warn.call_args_list)) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + + # Case 1: Requested version removed in GS; report missing requested version errr + protocol.mock_wire_data.set_extension_config("wire/ext_conf.xml") + protocol.mock_wire_data.reload() + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['code'], 1, "incorrect code reported") + self.assertIn("Missing requested version", update_status['formattedMessage']['message'], "incorrect message reported") + + # Case 2: Requested version in GS == Current Version; updateStatus should be Success + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported if asked in GS") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") + self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") + self.assertEqual(update_status['code'], 0, "incorrect code reported") + + # Case 3: Requested version in GS != Current Version; update fail and report error + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version("5.2.0.1") + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['expectedVersion'], "5.2.0.1", "incorrect version reported") + self.assertEqual(update_status['code'], 1, "incorrect code reported") def test_it_should_wait_to_fetch_first_goal_state(self): with _get_update_handler() as (update_handler, protocol): @@ -1722,7 +1351,7 @@ def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self): else: self.assertFalse(agent.is_blacklisted, "Agent {0} should not be blacklisted".format(agent.name)) - with _get_update_handler() as (update_handler, _): + with _get_update_handler(autoupdate_enabled=False) as (update_handler, _): update_handler.run(debug=True) self.assertEqual(20, self.agent_count(), "All agents should be available on disk") # Ensure none of the agents are blacklisted @@ -1777,11 +1406,6 @@ def _test_run(self, autoupdate_enabled=False, check_daemon_running=False, expect def test_run(self): self._test_run() - def test_run_stops_if_update_available(self): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', return_value=True): - update_handler = self._test_run(autoupdate_enabled=True) - self.assertEqual(0, update_handler.get_iterations_completed()) - def test_run_stops_if_orphaned(self): with patch('os.getppid', return_value=1): update_handler = self._test_run(check_daemon_running=True) @@ -1792,7 +1416,7 @@ def test_run_clears_sentinel_on_successful_exit(self): self.assertFalse(os.path.isfile(update_handler._sentinel_file_path())) def test_run_leaves_sentinel_on_unsuccessful_exit(self): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', side_effect=Exception): + with patch('azurelinuxagent.ga.agent_update.AgentUpdateHandler.run', side_effect=Exception): update_handler = self._test_run(autoupdate_enabled=True,expected_exit_code=1) self.assertTrue(os.path.isfile(update_handler._sentinel_file_path())) @@ -1804,20 +1428,16 @@ def test_run_emits_restart_event(self): class TestAgentUpgrade(UpdateTestCase): @contextlib.contextmanager - def create_conf_mocks(self, hotfix_frequency, normal_frequency): + def create_conf_mocks(self, autoupdate_frequency): # Disabling extension processing to speed up tests as this class deals with testing agent upgrades with patch("azurelinuxagent.common.conf.get_extensions_enabled", return_value=False): - with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): - with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", - return_value=hotfix_frequency): - with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", - return_value=normal_frequency): - with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - yield + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + yield @contextlib.contextmanager - def __get_update_handler(self, iterations=1, test_data=None, hotfix_frequency=1.0, normal_frequency=2.0, - reload_conf=None): + def __get_update_handler(self, iterations=1, test_data=None, + reload_conf=None, autoupdate_frequency=0.001): test_data = DATA_FILE if test_data is None else test_data @@ -1843,32 +1463,23 @@ def put_handler(url, *args, **_): return MockHttpResponse(status=201) protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) - with self.create_conf_mocks(hotfix_frequency, normal_frequency): - with patch("azurelinuxagent.ga.update.add_event") as mock_telemetry: + with self.create_conf_mocks(autoupdate_frequency): + with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: update_handler._protocol = protocol yield update_handler, mock_telemetry def __assert_exit_code_successful(self, update_handler): self.assertEqual(0, update_handler.get_exit_code(), "Exit code should be 0") - def __assert_upgrade_telemetry_emitted_for_requested_version(self, mock_telemetry, upgrade=True, version="99999.0.0.0"): + def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade=True, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Exiting current process to {0} to the request Agent version {1}'.format( + 'Agent update found, Exiting current process to {0} to the new Agent version {1}'.format( "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent was upgraded. Got: {0}".format( mock_telemetry.call_args_list)) - def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade_type=AgentUpgradeType.Normal): - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - '{0} Agent upgrade discovered, updating to WALinuxAgent-99999.0.0.0 -- exiting'.format( - upgrade_type) in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade] - self.assertEqual(1, len(upgrade_event_msgs), - "Did not find the event indicating that the agent was upgraded. Got: {0}".format( - mock_telemetry.call_args_list)) - def __assert_agent_directories_available(self, versions): for version in versions: self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) @@ -1880,11 +1491,6 @@ def __assert_agent_directories_exist_and_others_dont_exist(self, versions): self.assertFalse(any(other_agents), "All other agents should be purged from agent dir: {0}".format(other_agents)) - def __assert_no_agent_upgrade_telemetry(self, mock_telemetry): - self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - "Agent upgrade discovered, updating to" in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade]), "Unwanted upgrade") - def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VERSION)): self.assertIsNotNone(aggregate_status, "Status should be reported") self.assertEqual(aggregate_status['aggregateStatus']['guestAgentStatus']['version'], version, @@ -1893,128 +1499,64 @@ def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VE "Guest Agent should be reported as Ready") def test_it_should_upgrade_agent_on_process_start_if_auto_upgrade_enabled(self): - with self.__get_update_handler(iterations=10) as (update_handler, mock_telemetry): - + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file, iterations=10) as (update_handler, mock_telemetry): update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) self.assertEqual(1, update_handler.get_iterations(), "Update handler should've exited after the first run") - self.__assert_agent_directories_available(versions=["99999.0.0.0"]) + self.__assert_agent_directories_available(versions=["9.9.9.10"]) self.__assert_upgrade_telemetry_emitted(mock_telemetry) - def test_it_should_download_new_agents_and_not_auto_upgrade_if_not_permitted(self): + def test_it_should_not_update_agent_if_last_update_time_not_permitted(self): no_of_iterations = 10 data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations/2: - reload_conf.call_count += 1 - # Ensure the first set of versions were downloaded as part of the first manifest - self.__assert_agent_directories_available(versions=["1.0.0", "1.1.0", "1.2.0"]) - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, hotfix_frequency=10, - normal_frequency=10, reload_conf=reload_conf) as (update_handler, mock_telemetry): + data_file['ext_conf'] = "wire/ext_conf_requested_version.xml" + + self.prepare_agents(1) + test_frequency = 10 + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, + autoupdate_frequency=test_frequency) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_ga_manifest_version_version("5.2.0.1") + update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") self.__assert_exit_code_successful(update_handler) self.assertEqual(no_of_iterations, update_handler.get_iterations(), "Update handler should've run its course") - # Ensure the new agent versions were also downloaded once the manifest was updated - self.__assert_agent_directories_available(versions=["2.0.0", "2.1.0", "99999.0.0.0"]) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - - def test_it_should_upgrade_agent_in_given_time_window_if_permitted(self): - data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= 2: - reload_conf.call_count += 1 - # Ensure no new agent available so far - self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - test_normal_frequency = 0.1 - with self.__get_update_handler(iterations=50, test_data=data_file, reload_conf=reload_conf, - normal_frequency=test_normal_frequency) as (update_handler, mock_telemetry): - start_time = time.time() - update_handler.run(debug=True) - diff = time.time() - start_time - - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") - self.__assert_exit_code_successful(update_handler) - self.assertGreaterEqual(update_handler.get_iterations(), 3, - "Update handler should've run at least until the new GA was available") - # A bare-bone check to ensure that the agent waited for the new agent at least for the preset frequency time - self.assertGreater(diff, test_normal_frequency, "The test run should be at least greater than the set frequency") - self.__assert_agent_directories_available(versions=["99999.0.0.0"]) - self.__assert_upgrade_telemetry_emitted(mock_telemetry) + self.assertFalse(os.path.exists(self.agent_dir("5.2.0.1")), + "New agent directory should not be found") def test_it_should_not_auto_upgrade_if_auto_update_disabled(self): - with self.__get_update_handler(iterations=10) as (update_handler, mock_telemetry): + with self.__get_update_handler(iterations=10) as (update_handler, _): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) self.assertGreaterEqual(update_handler.get_iterations(), 10, "Update handler should've run 10 times") - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_not_auto_upgrade_if_corresponding_time_not_elapsed(self): - # On Normal upgrade, should not upgrade if Hotfix time elapsed - no_of_iterations = 10 - data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations / 2: - reload_conf.call_count += 1 - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, hotfix_frequency=0.01, - normal_frequency=10, reload_conf=reload_conf) as (update_handler, mock_telemetry): + def test_it_should_download_only_requested_version_if_available(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler.run(debug=True) - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") - self.__assert_exit_code_successful(update_handler) - self.assertEqual(no_of_iterations, update_handler.get_iterations(), "Update handler didn't run completely") - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] == WALAEventOperation.AgentUpgrade] - self.assertGreater(len([msg for msg in upgrade_event_msgs if - 'Discovered new {0} upgrade WALinuxAgent-99999.0.0.0; Will upgrade on or after'.format( - AgentUpgradeType.Normal) in msg]), 0, "Error message not propagated properly") + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) - def test_it_should_download_only_requested_version_if_available(self): + def test_it_should_download_largest_version_if_ga_versioning_disabled(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): + with patch.object(conf, "get_enable_ga_versioning", return_value=False): update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0"]) def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self): data_file = mockwiredata.DATA_FILE.copy() @@ -2025,37 +1567,36 @@ def test_it_should_cleanup_all_agents_except_requested_version_and_current_versi self.assertEqual(20, self.agent_count(), "Agent directories not set properly") with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): + self.prepare_agents(1) data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_missing_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_requested_version_missing_in_manifest.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] - # This will throw if corresponding message not found so not asserting on that - requested_version_found = next(kwarg for kwarg in agent_msgs if - "Found requested version in manifest: 5.2.1.0 for goal state incarnation_1" in kwarg['message']) - self.assertTrue(requested_version_found['is_success'], - "The requested version found op should be reported as a success") - - skipping_update = next(kwarg for kwarg in agent_msgs if - "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation_1, skipping agent update" in kwarg['message']) - self.assertEqual(skipping_update['version'], FlexibleVersion("5.2.1.0"), - "The not found message should be reported from requested agent version") - self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") - - def test_it_should_only_try_downloading_requested_version_on_new_incarnation(self): + self.__assert_exit_code_successful(update_handler) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if + kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] + # This will throw if corresponding message not found so not asserting on that + requested_version_found = next(kwarg for kwarg in agent_msgs if + "Goal state incarnation_1 is requesting a new agent version 5.2.1.0, will update the agent before processing the goal state" in kwarg['message']) + self.assertTrue(requested_version_found['is_success'], + "The requested version found op should be reported as a success") + + skipping_update = next(kwarg for kwarg in agent_msgs if + "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation: incarnation_1, skipping agent update" in kwarg['message']) + self.assertEqual(skipping_update['version'], str(CURRENT_VERSION), + "The not found message should be reported from current agent version") + self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") + + def test_it_should_try_downloading_requested_version_on_new_incarnation(self): no_of_iterations = 1000 # Set the test environment by adding 20 random agents to the agent directory @@ -2070,7 +1611,7 @@ def reload_conf(url, protocol): "goalstate"] >= 10 and mock_wire_data.call_counts["goalstate"] < 15: # Ensure we didn't try to download any agents except during the incarnation change - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) # Update the requested version to "99999.0.0.0" update_handler._protocol.mock_wire_data.set_extension_config_requested_version("99999.0.0.0") @@ -2084,23 +1625,21 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=0.01, hotfix_frequency=0.01) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.assertGreaterEqual(reload_conf.call_count, 1, "Reload conf not updated as expected") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) self.assertEqual(update_handler._protocol.mock_wire_data.call_counts['agentArtifact'], 1, "only 1 agent should've been downloaded - 1 per incarnation") - self.assertEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, + self.assertGreaterEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, "only 1 agent manifest call should've been made - 1 per incarnation") - def test_it_should_fallback_to_old_update_logic_if_requested_version_not_available(self): + def test_it_should_update_to_largest_version_if_requested_version_not_available(self): no_of_iterations = 100 # Set the test environment by adding 20 random agents to the agent directory @@ -2116,7 +1655,7 @@ def reload_conf(url, protocol): reload_conf.call_count += 1 # By this point, the GS with requested version should've been executed. Verify that - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) # Update the ext-conf and incarnation and remove requested versions from GS, # this should download all versions requested in config @@ -2131,20 +1670,17 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=0.001) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted(mock_telemetry) - self.__assert_agent_directories_exist_and_others_dont_exist( - versions=["1.0.0", "1.1.0", "1.2.0", "2.0.0", "2.1.0", "9.9.9.10", "99999.0.0.0", str(CURRENT_VERSION)]) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) - def test_it_should_not_download_anything_if_requested_version_is_current_version_and_delete_all_agents(self): + def test_it_should_not_download_anything_if_requested_version_is_current_version(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" @@ -2152,15 +1688,14 @@ def test_it_should_not_download_anything_if_requested_version_is_current_version self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(test_data=data_file) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") def test_it_should_skip_wait_to_update_if_requested_version_available(self): no_of_iterations = 100 @@ -2186,18 +1721,18 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=10, hotfix_frequency=10) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_ga_manifest_version_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(20) + update_handler.run(debug=True) self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") self.assertLess(update_handler.get_iterations(), no_of_iterations, "The code should've exited as soon as requested version was found") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") - def test_it_should_blacklist_current_agent_on_downgrade(self): + def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): # Create Agent directory for current agent self.prepare_agents(count=1) self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION))) @@ -2208,53 +1743,19 @@ def test_it_should_blacklist_current_agent_on_downgrade(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) - update_handler._protocol.mock_wire_data.set_incarnation(2) - try: - set_daemon_version("1.0.0.0") - update_handler.run(debug=True) - finally: - os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, upgrade=False, + self.__assert_upgrade_telemetry_emitted(mock_telemetry, upgrade=False, version=downgraded_version) current_agent = next(agent for agent in self.agents() if agent.version == CURRENT_VERSION) self.assertTrue(current_agent.is_blacklisted, "The current agent should be blacklisted") - self.assertEqual(current_agent.error.reason, "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " + self.assertEqual(current_agent.error.reason, "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION), "Invalid reason specified for blacklisting agent") - - def test_it_should_not_downgrade_below_daemon_version(self): - data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version("1.0.0.0") - update_handler._protocol.mock_wire_data.set_incarnation(2) - - try: - set_daemon_version("1.2.3.4") - update_handler.run(debug=True) - finally: - os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) - - self.__assert_exit_code_successful(update_handler) - upgrade_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] == WALAEventOperation.AgentUpgrade] - # This will throw if corresponding message not found so not asserting on that - requested_version_found = next(kwarg for kwarg in upgrade_msgs if - "Found requested version in manifest: 1.0.0.0 for goal state incarnation_2" in kwarg[ - 'message']) - self.assertTrue(requested_version_found['is_success'], - "The requested version found op should be reported as a success") - - skipping_update = next(kwarg for kwarg in upgrade_msgs if - "Can't process the upgrade as the requested version: 1.0.0.0 is < current daemon version: 1.2.3.4" in - kwarg['message']) - self.assertFalse(skipping_update['is_success'], "Failed Event should be reported as a failure") - self.__assert_ga_version_in_status(update_handler._protocol.aggregate_status) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[downgraded_version, str(CURRENT_VERSION)]) @patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler') @@ -2288,12 +1789,13 @@ def iterator(*_, **__): mock_is_running.__get__ = Mock(side_effect=iterator) with patch('azurelinuxagent.ga.exthandlers.get_exthandlers_handler'): with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'): - with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): - with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff - with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): - with patch('time.sleep'): - with patch('sys.exit'): - self.update_handler.run() + with patch('azurelinuxagent.ga.agent_update.get_agent_update_handler'): + with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): + with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff + with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): + with patch('time.sleep'): + with patch('sys.exit'): + self.update_handler.run() def _setup_mock_thread_and_start_test_run(self, mock_thread, is_alive=True, invocations=0): thread = MagicMock() @@ -2623,34 +2125,41 @@ def test_it_should_process_goal_state_only_on_new_goal_state(self): update_handler = _create_update_handler() remote_access_handler = Mock() remote_access_handler.run = Mock() + agent_update_handler = Mock() + agent_update_handler.run = Mock() # process a goal state - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(1, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have been called on the first goal state") self.assertEqual(1, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on the first goal state") self.assertEqual(1, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on the first goal state") + self.assertEqual(1, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the first goal state") # process the same goal state - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(1, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have not been called on the same goal state") self.assertEqual(2, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on the same goal state") self.assertEqual(1, remote_access_handler.run.call_count, "remote_access_handler.run() should not have been called on the same goal state") + self.assertEqual(2, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the same goal state") # process a new goal state exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) exthandlers_handler.protocol.client.update_goal_state() - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(2, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have been called on a new goal state") self.assertEqual(3, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on a new goal state") self.assertEqual(2, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on a new goal state") + self.assertEqual(3, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the new goal state") def test_it_should_write_the_agent_status_to_the_history_folder(self): with _mock_exthandlers_handler() as exthandlers_handler: update_handler = _create_update_handler() remote_access_handler = Mock() remote_access_handler.run = Mock() + agent_update_handler = Mock() + agent_update_handler.run = Mock() - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) incarnation = exthandlers_handler.protocol.get_goal_state().incarnation matches = glob.glob(os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "*_{0}".format(incarnation))) @@ -2893,16 +2402,17 @@ def test_update_handler_should_use_the_initial_goal_state_period_until_the_goal_ with patch('azurelinuxagent.common.conf.get_goal_state_period', return_value=goal_state_period): with _mock_exthandlers_handler([ExtensionStatusValue.transitioning, ExtensionStatusValue.success]) as exthandlers_handler: remote_access_handler = Mock() + agent_update_handler = Mock() update_handler = _create_update_handler() self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period") # the extension is transisioning, so we should still be using the initial goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period when the extension is transitioning") # the goal state converged (the extension succeeded), so we should switch to the regular goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(goal_state_period, update_handler._goal_state_period, "Expected the regular goal state period after the goal state converged") def test_update_handler_should_switch_to_the_regular_goal_state_period_when_the_goal_state_does_not_converges(self): @@ -2911,17 +2421,18 @@ def test_update_handler_should_switch_to_the_regular_goal_state_period_when_the_ with patch('azurelinuxagent.common.conf.get_goal_state_period', return_value=goal_state_period): with _mock_exthandlers_handler([ExtensionStatusValue.transitioning, ExtensionStatusValue.transitioning]) as exthandlers_handler: remote_access_handler = Mock() + agent_update_handler = Mock() update_handler = _create_update_handler() self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period") # the extension is transisioning, so we should still be using the initial goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period when the extension is transitioning") # a new goal state arrives before the current goal state converged (the extension is transitioning), so we should switch to the regular goal state period exthandlers_handler.protocol.mock_wire_data.set_incarnation(100) - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(goal_state_period, update_handler._goal_state_period, "Expected the regular goal state period when the goal state does not converge") diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 196ed32db..936533e97 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -462,3 +462,6 @@ def set_extension_config(self, ext_conf_file): def set_extension_config_requested_version(self, version): self.ext_conf = WireProtocolData.replace_xml_element_value(self.ext_conf, "Version", version) + + def set_ga_manifest_version_version(self, version): + self.ga_manifest = WireProtocolData.replace_xml_element_value(self.ga_manifest, "Version", version)