From 487900bf59920d71655003ecb7cfea92fa8e1813 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Wed, 10 Apr 2024 07:50:28 -0500 Subject: [PATCH 1/2] add last_collected checks to retry_errored_repos to correctly follow contraints Signed-off-by: Isaac Milarsky --- augur/tasks/start_tasks.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 2a458649c3..866b7a0288 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -326,13 +326,22 @@ def retry_errored_repos(self): #collection_status table once augur dev is less unstable. with DatabaseSession(logger,engine) as session: query = s.sql.text(f"""UPDATE collection_status SET secondary_status = '{CollectionState.PENDING.value}'""" - f""" WHERE secondary_status = '{CollectionState.ERROR.value}' ;""" + f""" WHERE secondary_status = '{CollectionState.ERROR.value}' and secondary_data_last_collected is NULL;""" f"""UPDATE collection_status SET core_status = '{CollectionState.PENDING.value}'""" - f""" WHERE core_status = '{CollectionState.ERROR.value}' ;""" + f""" WHERE core_status = '{CollectionState.ERROR.value}' and core_data_last_collected is NULL;""" f"""UPDATE collection_status SET facade_status = '{CollectionState.PENDING.value}'""" - f""" WHERE facade_status = '{CollectionState.ERROR.value}' ;""" + f""" WHERE facade_status = '{CollectionState.ERROR.value}' and facade_data_last_collected is NULL;""" f"""UPDATE collection_status SET ml_status = '{CollectionState.PENDING.value}'""" - f""" WHERE ml_status = '{CollectionState.ERROR.value}' ;""" + f""" WHERE ml_status = '{CollectionState.ERROR.value}' and ml_data_last_collected is NULL;""" + + f"""UPDATE collection_status SET secondary_status = '{CollectionState.SUCCESS.value}'""" + f""" WHERE secondary_status = '{CollectionState.ERROR.value}' and secondary_data_last_collected is not NULL;""" + f"""UPDATE collection_status SET core_status = '{CollectionState.SUCCESS.value}'""" + f""" WHERE core_status = '{CollectionState.ERROR.value}' and core_data_last_collected is not NULL;;""" + f"""UPDATE collection_status SET facade_status = '{CollectionState.SUCCESS.value}'""" + f""" WHERE facade_status = '{CollectionState.ERROR.value}' and facade_data_last_collected is not NULL;;""" + f"""UPDATE collection_status SET ml_status = '{CollectionState.SUCCESS.value}'""" + f""" WHERE ml_status = '{CollectionState.ERROR.value}' and ml_data_last_collected is not NULL;;""" ) session.execute_sql(query) From 8567074e7a5adff6d96c8c0330712be3cb1aea89 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Thu, 25 Apr 2024 12:00:07 -0500 Subject: [PATCH 2/2] fix facade task signature Signed-off-by: Isaac Milarsky --- augur/tasks/git/facade_tasks.py | 28 +---------------------- augur/tasks/github/facade_github/tasks.py | 10 +++++++- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index b96c596bc7..9c699f7e79 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -439,32 +439,6 @@ def generate_analysis_sequence(logger,repo_git, session): return analysis_sequence - -def generate_contributor_sequence(logger,repo_git, session): - - contributor_sequence = [] - #all_repo_ids = [] - repo_id = None - - #contributor_sequence.append(facade_start_contrib_analysis_task.si()) - query = s.sql.text("""SELECT repo_id FROM repo - WHERE repo_git=:value""").bindparams(value=repo_git) - - repo = session.execute_sql(query).fetchone() - session.logger.info(f"repo: {repo}") - repo_id = repo[0] - #pdb.set_trace() - #breakpoint() - #for repo in all_repos: - # contributor_sequence.append(insert_facade_contributors.si(repo['repo_id'])) - #all_repo_ids = [repo['repo_id'] for repo in all_repos] - - #contrib_group = create_grouped_task_load(dataList=all_repo_ids,task=insert_facade_contributors)#group(contributor_sequence) - #contrib_group.link_error(facade_error_handler.s()) - #return contrib_group#chain(facade_start_contrib_analysis_task.si(), contrib_group) - return insert_facade_contributors.si(repo_id) - - def facade_phase(repo_git): logger = logging.getLogger(facade_phase.__name__) logger.info("Generating facade sequence") @@ -506,7 +480,7 @@ def facade_phase(repo_git): #Generate contributor analysis task group. if not limited_run or (limited_run and run_facade_contributors): - facade_core_collection.append(generate_contributor_sequence(logger,repo_git,session)) + facade_core_collection.append(insert_facade_contributors.si(repo_git)) #These tasks need repos to be cloned by facade before they can work. diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 64ce4f7409..6bf9888c07 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -194,14 +194,22 @@ def link_commits_to_contributor(session,contributorQueue): # Update the contributors table from the data facade has gathered. @celery.task(base=AugurFacadeRepoCollectionTask, bind=True) -def insert_facade_contributors(self, repo_id): +def insert_facade_contributors(self, repo_git): engine = self.app.engine logger = logging.getLogger(insert_facade_contributors.__name__) + repo_id = None with GithubTaskManifest(logger) as manifest: + #contributor_sequence.append(facade_start_contrib_analysis_task.si()) + query = s.sql.text("""SELECT repo_id FROM repo + WHERE repo_git=:value""").bindparams(value=repo_git) + + repo = manifest.augur_db.execute_sql(query).fetchone() + logger.info(f"repo: {repo}") + repo_id = repo[0] # Get all of the commit data's emails and names from the commit table that do not appear # in the contributors table or the contributors_aliases table.