diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index 603ed1aa2..42170db85 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -84,6 +84,15 @@ def __init__(self, msg=None, inner=None): super(AgentUpdateError, self).__init__(msg, inner) +class AgentFamilyMissingError(AgentError): + """ + When agent family is missing. + """ + + def __init__(self, msg=None, inner=None): + super(AgentFamilyMissingError, self).__init__(msg, inner) + + class CGroupsException(AgentError): """ Exception to classify any cgroups related issue. diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index ed157bdf5..f34235702 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -15,18 +15,16 @@ # limitations under the License. # # Requires Python 2.6+ and Openssl 1.0+ -import datetime import os from azurelinuxagent.common import conf, logger from azurelinuxagent.common.event import add_event, WALAEventOperation -from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError +from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError, AgentFamilyMissingError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus, VERSION_0 from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import get_daemon_version -from azurelinuxagent.ga.ga_version_updater import RSMUpdates from azurelinuxagent.ga.rsm_version_updater import RSMVersionUpdater from azurelinuxagent.ga.self_update_version_updater import SelfUpdateVersionUpdater @@ -67,7 +65,7 @@ def __init__(self, protocol): # restore the state of rsm update. Default to self-update if last update is not with RSM. if not self._get_is_last_update_with_rsm(): - self._updater = SelfUpdateVersionUpdater(self._gs_id, datetime.datetime.min) + self._updater = SelfUpdateVersionUpdater(self._gs_id) else: self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) @@ -117,7 +115,7 @@ def _get_agent_family_manifest(self, goal_state): """ Get the agent_family from last GS for the given family Returns: first entry of Manifest - Exception if no manifests found in the last GS + Exception if no manifests found in the last GS and log it only on new goal state """ family = self._ga_family_type agent_families = goal_state.extensions_goal_state.agent_families @@ -130,11 +128,13 @@ def _get_agent_family_manifest(self, goal_state): agent_family_manifests.append(m) if not family_found: - raise AgentUpdateError(u"Agent family: {0} not found in the goal state: {1}, skipping agent update".format(family, self._gs_id)) + raise AgentFamilyMissingError(u"Agent family: {0} not found in the goal state: {1}, skipping agent update \n" + u"[Note: This error is permanent for this goal state and Will not log same error until we receive new goal state]".format(family, self._gs_id)) if len(agent_family_manifests) == 0: - raise AgentUpdateError( - u"No manifest links found for agent family: {0} for goal state: {1}, skipping agent update".format( + raise AgentFamilyMissingError( + u"No manifest links found for agent family: {0} for goal state: {1}, skipping agent update \n" + u"[Note: This error is permanent for this goal state and will not log same error until we receive new goal state]".format( family, self._gs_id)) return agent_family_manifests[0] @@ -145,30 +145,38 @@ def run(self, goal_state, ext_gs_updated): if not conf.get_autoupdate_enabled() or not conf.get_download_new_agents(): return - # verify if agent update is allowed this time (RSM checks new goal state; self-update checks manifest download interval) - if not self._updater.is_update_allowed_this_time(ext_gs_updated): - return + # Update the state only on new goal state + if ext_gs_updated: + self._gs_id = goal_state.extensions_goal_state.id + self._updater.sync_new_gs_id(self._gs_id) - self._gs_id = goal_state.extensions_goal_state.id agent_family = self._get_agent_family_manifest(goal_state) - # updater will return RSM enabled or disabled if we need to switch to self-update or rsm update - updater_mode = self._updater.check_and_switch_updater_if_changed(agent_family, self._gs_id, ext_gs_updated) + # Updater will return True or False if we need to switch the updater + # If self-updater receives RSM update enabled, it will switch to RSM updater + # If RSM updater receives RSM update disabled, it will switch to self-update + # No change in updater if GS not updated + is_rsm_update_enabled = self._updater.is_rsm_update_enabled(agent_family, ext_gs_updated) - if updater_mode == RSMUpdates.Disabled: + if not is_rsm_update_enabled and isinstance(self._updater, RSMVersionUpdater): msg = "VM not enabled for RSM updates, switching to self-update mode" logger.info(msg) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) - self._updater = SelfUpdateVersionUpdater(self._gs_id, datetime.datetime.now()) + self._updater = SelfUpdateVersionUpdater(self._gs_id) self._remove_rsm_update_state() - if updater_mode == RSMUpdates.Enabled: + if is_rsm_update_enabled and isinstance(self._updater, SelfUpdateVersionUpdater): msg = "VM enabled for RSM updates, switching to RSM update mode" logger.info(msg) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) self._save_rsm_update_state() + # If updater is changed in previous step, we allow update as it consider as first attempt. If not, it checks below condition + # RSM checks new goal state; self-update checks manifest download interval + if not self._updater.is_update_allowed_this_time(ext_gs_updated): + return + self._updater.retrieve_agent_version(agent_family, goal_state) if not self._updater.is_retrieved_version_allowed_to_update(agent_family): @@ -183,14 +191,20 @@ def run(self, goal_state, ext_gs_updated): self._updater.proceed_with_update() except Exception as err: + log_error = True if isinstance(err, AgentUpgradeExitException): raise err elif isinstance(err, AgentUpdateError): error_msg = ustr(err) + elif isinstance(err, AgentFamilyMissingError): + error_msg = ustr(err) + # Agent family missing error is permanent in the given goal state, so we don't want to log it on every iteration of main loop if there is no new goal state + log_error = ext_gs_updated else: error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) - logger.warn(error_msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False) + if log_error: + logger.warn(error_msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False) self._last_attempted_update_error_msg = error_msg def get_vmagent_update_status(self): diff --git a/azurelinuxagent/ga/ga_version_updater.py b/azurelinuxagent/ga/ga_version_updater.py index 0d3f639f2..46ae1f31f 100644 --- a/azurelinuxagent/ga/ga_version_updater.py +++ b/azurelinuxagent/ga/ga_version_updater.py @@ -30,14 +30,6 @@ from azurelinuxagent.ga.guestagent import GuestAgent -class RSMUpdates(object): - """ - Enum for switching between RSM updates and self updates - """ - Enabled = "Enabled" - Disabled = "Disabled" - - class GAVersionUpdater(object): def __init__(self, gs_id): @@ -53,15 +45,13 @@ def is_update_allowed_this_time(self, ext_gs_updated): """ raise NotImplementedError - def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + def is_rsm_update_enabled(self, agent_family, ext_gs_updated): """ - checks and raise the updater exception if we need to switch to self-update from rsm update or vice versa + return True if we need to switch to RSM-update from self-update and vice versa. @param agent_family: agent family - @param gs_id: incarnation of the goal state @param ext_gs_updated: True if extension goal state updated else False - @return: RSMUpdates.Disabled: return when agent need to stop rsm updates and switch to self-update - RSMUpdates.Enabled: return when agent need to switch to rsm update - None: return when no need to switch + @return: False when agent need to stop rsm updates + True: when agent need to switch to rsm update """ raise NotImplementedError @@ -107,6 +97,13 @@ def version(self): """ return self._version + def sync_new_gs_id(self, gs_id): + """ + Update gs_id + @param gs_id: goal state id + """ + self._gs_id = gs_id + def download_and_get_new_agent(self, protocol, agent_family, goal_state): """ Function downloads the new agent and returns the downloaded version. diff --git a/azurelinuxagent/ga/rsm_version_updater.py b/azurelinuxagent/ga/rsm_version_updater.py index 6df7b6e30..a7a8bd97d 100644 --- a/azurelinuxagent/ga/rsm_version_updater.py +++ b/azurelinuxagent/ga/rsm_version_updater.py @@ -24,7 +24,7 @@ from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME -from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater, RSMUpdates +from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater from azurelinuxagent.ga.guestagent import GuestAgent @@ -49,24 +49,23 @@ def is_update_allowed_this_time(self, ext_gs_updated): """ return ext_gs_updated - def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + def is_rsm_update_enabled(self, agent_family, ext_gs_updated): """ Checks if there is a new goal state and decide if we need to continue with rsm update or switch to self-update. - Firstly it checks agent supports GA versioning or not. If not, we return rsm updates disabled to switch to self-update. - if vm is enabled for RSM updates and continue with rsm update, otherwise we return rsm updates disabled to switch to self-update. + Firstly it checks agent supports GA versioning or not. If not, we return false to switch to self-update. + if vm is enabled for RSM updates and continue with rsm update, otherwise we return false to switch to self-update. if either isVersionFromRSM or isVMEnabledForRSMUpgrades or version is missing in the goal state, we ignore the update as we consider it as invalid goal state. """ if ext_gs_updated: - self._gs_id = gs_id if not conf.get_enable_ga_versioning(): - return RSMUpdates.Disabled + return False if agent_family.is_vm_enabled_for_rsm_upgrades is None: raise AgentUpdateError( "Received invalid goal state:{0}, missing isVMEnabledForRSMUpgrades property. So, skipping agent update".format( self._gs_id)) elif not agent_family.is_vm_enabled_for_rsm_upgrades: - return RSMUpdates.Disabled + return False else: if agent_family.is_version_from_rsm is None: raise AgentUpdateError( @@ -77,7 +76,7 @@ def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_update "Received invalid goal state:{0}, missing version property. So, skipping agent update".format( self._gs_id)) - return None + return True def retrieve_agent_version(self, agent_family, goal_state): """ diff --git a/azurelinuxagent/ga/self_update_version_updater.py b/azurelinuxagent/ga/self_update_version_updater.py index ca27c4399..6605a28eb 100644 --- a/azurelinuxagent/ga/self_update_version_updater.py +++ b/azurelinuxagent/ga/self_update_version_updater.py @@ -23,7 +23,7 @@ from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import CURRENT_VERSION -from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater, RSMUpdates +from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater class SelfUpdateType(object): @@ -35,9 +35,9 @@ class SelfUpdateType(object): class SelfUpdateVersionUpdater(GAVersionUpdater): - def __init__(self, gs_id, last_attempted_manifest_download_time): + def __init__(self, gs_id): super(SelfUpdateVersionUpdater, self).__init__(gs_id) - self._last_attempted_manifest_download_time = last_attempted_manifest_download_time + self._last_attempted_manifest_download_time = datetime.datetime.min self._last_attempted_self_update_time = datetime.datetime.min @staticmethod @@ -119,14 +119,13 @@ def is_update_allowed_this_time(self, ext_gs_updated): return False return True - def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + def is_rsm_update_enabled(self, agent_family, ext_gs_updated): """ Checks if there is a new goal state and decide if we need to continue with self-update or switch to rsm update. - if vm is not enabled for RSM updates or agent not supports GA versioning then we continue with self update, otherwise we rsm enabled to switch to rsm update. + if vm is not enabled for RSM updates or agent not supports GA versioning then we continue with self update, otherwise we return true to switch to rsm update. if isVersionFromRSM is missing but isVMEnabledForRSMUpgrades is present in the goal state, we ignore the update as we consider it as invalid goal state. """ if ext_gs_updated: - self._gs_id = gs_id if conf.get_enable_ga_versioning() and agent_family.is_vm_enabled_for_rsm_upgrades is not None and agent_family.is_vm_enabled_for_rsm_upgrades: if agent_family.is_version_from_rsm is None: raise AgentUpdateError( @@ -137,9 +136,9 @@ def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_update raise AgentUpdateError( "Received invalid goal state:{0}, missing version property. So, skipping agent update".format( self._gs_id)) - return RSMUpdates.Enabled + return True - return None + return False def retrieve_agent_version(self, agent_family, goal_state): """ diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 0ac373a6b..5ba7f3c70 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -366,6 +366,16 @@ def test_handles_missing_agent_family(self): 'message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + # making multiple agent update attempts and assert only one time logged + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), False) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), False) + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), + "Agent manifest error should be logged once if it's same goal state") + def test_it_should_report_update_status_with_success(self): data_file = DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 37fb75796..c25585f14 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1784,13 +1784,14 @@ def test_it_should_not_download_anything_if_rsm_version_is_current_version(self) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_skip_wait_to_update_if_rsm_version_available(self): + def test_it_should_skip_wait_to_update_immediately_if_rsm_version_available(self): no_of_iterations = 100 def reload_conf(url, protocol): mock_wire_data = protocol.mock_wire_data # This function reloads the conf mid-run to mimic an actual customer scenario + # Setting the rsm request to be sent after some iterations if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts["goalstate"] >= 5: reload_conf.call_count += 1 @@ -1808,7 +1809,8 @@ def reload_conf(url, protocol): data_file = wire_protocol_data.DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + # Setting the prod frequency to mimic a real scenario + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, autoupdate_frequency=6000) as (update_handler, mock_telemetry): update_handler._protocol.mock_wire_data.set_ga_manifest_version_version(str(CURRENT_VERSION)) update_handler._protocol.mock_wire_data.set_incarnation(20) update_handler.run(debug=True)