From 89cf8c601999e6ce49ff59a78cf14ea942c4357a Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sat, 9 Mar 2024 11:43:56 -0600 Subject: [PATCH 1/6] Add gitlab contributors for mr data --- augur/application/db/data_parse.py | 6 +-- augur/tasks/gitlab/issues_task.py | 4 +- augur/tasks/gitlab/merge_request_task.py | 52 +++++++++++++++++++++++- 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 1d18e26196..1291276f59 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -138,9 +138,8 @@ def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: i for assignee in assignees: assignee_dict = { - 'contrib_id': None, + 'contrib_id': assignee["cntrb_id"], 'repo_id': repo_id, - # TODO: Temporarily setting this to id which the id of the contributor, unitl we can get the contrib_id set and create a unique on the contrib_id and the pull_request_id 'pr_assignee_src_id': assignee["id"], 'tool_source': tool_source, 'tool_version': tool_version, @@ -807,8 +806,7 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t 'pr_src_state': pr['state'], 'pr_src_locked': pr['discussion_locked'], 'pr_src_title': pr['title'], - # TODO: Add contributor logic for gitlab - 'pr_augur_contributor_id': None, + 'pr_augur_contributor_id': pr["cntrb_id"], 'pr_body': pr['description'], 'pr_created_at': pr['created_at'], 'pr_updated_at': pr['updated_at'], diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index 6159a6bb0a..a48f243dca 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -308,7 +308,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): for message in messages: - message, contributor = process_gitlab_comment_contributors(message, tool_source, tool_version, data_source) + message, contributor = process_gitlab_issue_comment_contributors(message, tool_source, tool_version, data_source) if contributor: contributors.append(contributor) @@ -352,7 +352,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): augur_db.insert_data(issue_message_ref_dicts, IssueMessageRef, issue_message_ref_natural_keys) -def process_gitlab_comment_contributors(message, tool_source, tool_version, data_source): +def process_gitlab_issue_comment_contributors(message, tool_source, tool_version, data_source): contributor = extract_needed_gitlab_contributor_data(message["author"], tool_source, tool_version, data_source) if contributor: diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index 29ee7a54bb..bfc0e6bf5f 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -4,10 +4,11 @@ from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest -from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_mr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data +from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_mr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data, extract_needed_gitlab_contributor_data from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts -from augur.application.db.models import PullRequest, PullRequestLabel, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message +from augur.application.db.models import PullRequest, PullRequestLabel, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message, Contributor from augur.application.db.util import execute_session_query +from augur.tasks.util.worker_util import remove_duplicate_dicts platform_id = 2 @@ -99,12 +100,17 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): data_source = "Gitlab API" merge_requests = [] + contributors = [] mr_ids = [] mr_mapping_data = {} for mr in data: mr_ids.append(mr["iid"]) + mr, contributor_data = process_mr_contributors(mr, tool_source, tool_version, data_source) + + contributors += contributor_data + merge_requests.append(extract_needed_pr_data_from_gitlab_merge_request(mr, repo_id, tool_source, tool_version)) assignees = extract_needed_merge_request_assignee_data(mr["assignees"], repo_id, tool_source, tool_version, data_source) @@ -117,6 +123,11 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): "labels": labels } + contributors = remove_duplicate_dicts(contributors) + + logger.info(f"{task_name}: Inserting {len(contributors)} contributors") + augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) + logger.info(f"{task_name}: Inserting mrs of length: {len(merge_requests)}") pr_natural_keys = ["repo_id", "pr_src_id"] pr_string_fields = ["pr_src_title", "pr_body"] @@ -208,6 +219,7 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id message_dicts = [] + contributors = [] message_ref_mapping_data = {} for id, messages in data.items(): @@ -221,6 +233,11 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): for message in messages: + message, contributor = process_gitlab_mr_comment_contributors(message, tool_source, tool_version, data_source) + + if contributor: + contributors.append(contributor) + mr_message_ref_data = extract_needed_gitlab_mr_message_ref_data(message, pull_request_id, repo_id, tool_source, tool_version, data_source) message_ref_mapping_data[message["id"]] = { @@ -231,6 +248,10 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source) ) + contributors = remove_duplicate_dicts(contributors) + + logger.info(f"{task_name}: Inserting {len(contributors)} contributors") + augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) logger.info(f"{task_name}: Inserting {len(message_dicts)} messages") message_natural_keys = ["platform_msg_id"] @@ -560,3 +581,30 @@ def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, r index += 1 return all_data + + +def process_mr_contributors(mr, tool_source, tool_version, data_source): + + contributors = [] + + issue_cntrb = extract_needed_gitlab_contributor_data(mr["author"], tool_source, tool_version, data_source) + mr["cntrb_id"] = issue_cntrb["cntrb_id"] + contributors.append(issue_cntrb) + + for assignee in mr["assignees"]: + + issue_assignee_cntrb = extract_needed_gitlab_contributor_data(assignee, tool_source, tool_version, data_source) + assignee["cntrb_id"] = issue_assignee_cntrb["cntrb_id"] + contributors.append(issue_assignee_cntrb) + + return mr, contributors + +def process_gitlab_mr_comment_contributors(message, tool_source, tool_version, data_source): + + contributor = extract_needed_gitlab_contributor_data(message["author"], tool_source, tool_version, data_source) + if contributor: + message["cntrb_id"] = contributor["cntrb_id"] + else: + message["cntrb_id"] = None + + return message, contributor \ No newline at end of file From 94f419f74d2861d2efa41faf89f26c8a6ae21f80 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sun, 10 Mar 2024 18:08:00 -0500 Subject: [PATCH 2/6] Update messages table unique key --- augur/application/db/models/augur_data.py | 2 +- .../versions/27_update_messages_unique.py | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 augur/application/schema/alembic/versions/27_update_messages_unique.py diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index d938391087..606236df21 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -1474,7 +1474,7 @@ class LstmAnomalyResult(Base): class Message(Base): __tablename__ = "message" __table_args__ = ( - UniqueConstraint("platform_msg_id", name="message-insert-unique"), + UniqueConstraint("platform_msg_id", "pltfrm_id", name="message-insert-unique"), Index("msg-cntrb-id-idx", "cntrb_id"), Index("platformgrouper", "msg_id", "pltfrm_id"), Index("messagegrouper", "msg_id", "rgls_id", unique=True), diff --git a/augur/application/schema/alembic/versions/27_update_messages_unique.py b/augur/application/schema/alembic/versions/27_update_messages_unique.py new file mode 100644 index 0000000000..9c60349412 --- /dev/null +++ b/augur/application/schema/alembic/versions/27_update_messages_unique.py @@ -0,0 +1,33 @@ +""" Update messages unique + +Revision ID: 27 +Revises: 26 +Create Date: 2024-03-10 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text + +# revision identifiers, used by Alembic. +revision = '27' +down_revision = '26' +branch_labels = None +depends_on = None + + +schema_name = 'augur_data' +table_name = "message" +constraint_name = "message-insert-unique" + +def upgrade(): + + op.drop_constraint(constraint_name, table_name, schema=schema_name, type_='unique') + + op.create_unique_constraint(constraint_name, table_name, ['platform_msg_id', 'pltfrm_id'], schema=schema_name) + +def downgrade(): + + op.drop_constraint(constraint_name, table_name, schema=schema_name, type_='unique') + + op.create_unique_constraint(constraint_name, table_name, ['platform_msg_id'], schema=schema_name) From cbe0344e35702e7c43f13f2ebe4551310651218f Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 11 Mar 2024 17:33:16 -0500 Subject: [PATCH 3/6] Add more stuff for pr contributors --- augur/tasks/github/messages/tasks.py | 2 +- augur/tasks/github/pull_requests/tasks.py | 2 +- augur/tasks/gitlab/issues_task.py | 2 +- augur/tasks/gitlab/merge_request_task.py | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/augur/tasks/github/messages/tasks.py b/augur/tasks/github/messages/tasks.py index fc1776b2ed..54a4c41e0c 100644 --- a/augur/tasks/github/messages/tasks.py +++ b/augur/tasks/github/messages/tasks.py @@ -178,7 +178,7 @@ def process_messages(messages, task_name, repo_id, logger, augur_db): augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) logger.info(f"{task_name}: Inserting {len(message_dicts)} messages") - message_natural_keys = ["platform_msg_id"] + message_natural_keys = ["platform_msg_id", "pltfrm_id"] message_return_columns = ["msg_id", "platform_msg_id"] message_string_fields = ["msg_text"] message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys, diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 37f59e5210..69e40f6818 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -279,7 +279,7 @@ def collect_pull_request_review_comments(repo_git: str) -> None: logger.info(f"Inserting {len(pr_review_comment_dicts)} pr review comments") - message_natural_keys = ["platform_msg_id"] + message_natural_keys = ["platform_msg_id", "pltfrm_id"] message_return_columns = ["msg_id", "platform_msg_id"] message_return_data = augur_db.insert_data(pr_review_comment_dicts, Message, message_natural_keys, message_return_columns) if message_return_data is None: diff --git a/augur/tasks/gitlab/issues_task.py b/augur/tasks/gitlab/issues_task.py index a48f243dca..b96650c9a1 100644 --- a/augur/tasks/gitlab/issues_task.py +++ b/augur/tasks/gitlab/issues_task.py @@ -329,7 +329,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db): augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) logger.info(f"{task_name}: Inserting {len(message_dicts)} messages") - message_natural_keys = ["platform_msg_id"] + message_natural_keys = ["platform_msg_id", "pltfrm_id"] message_return_columns = ["msg_id", "platform_msg_id"] message_string_fields = ["msg_text"] message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys, diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index bfc0e6bf5f..c7b71a2645 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -250,11 +250,11 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db): contributors = remove_duplicate_dicts(contributors) - logger.info(f"{task_name}: Inserting {len(contributors)} contributors") + logger.info(f"{task_name}: Inserting {len(contributors)} mr message contributors") augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) - logger.info(f"{task_name}: Inserting {len(message_dicts)} messages") - message_natural_keys = ["platform_msg_id"] + logger.info(f"{task_name}: Inserting {len(message_dicts)} mr messages") + message_natural_keys = ["platform_msg_id", "pltfrm_id"] message_return_columns = ["msg_id", "platform_msg_id"] message_string_fields = ["msg_text"] message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys, From 05571511d936f155df6a8e6e5ac4343a68fa46b0 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Thu, 14 Mar 2024 19:26:19 -0500 Subject: [PATCH 4/6] handle niche facade errors Signed-off-by: Isaac Milarsky --- augur/tasks/git/facade_tasks.py | 2 +- .../contributor_interfaceable/contributor_interface.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index ee3dc047ff..74f1a6ee4b 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -252,7 +252,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: session.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}") - if not len(missing_commits): + if not len(missing_commits) or repo_id is None: #session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits)) return diff --git a/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py b/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py index 515ebe9ac2..4e6daadf05 100644 --- a/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py +++ b/augur/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py @@ -367,7 +367,12 @@ def get_login_with_supplemental_data(logger,db,auth, commit_data): return None # Grab first result and make sure it has the highest match score - match = login_json['items'][0] + try: + match = login_json['items'][0] + except IndexError as e: + logger.error(f"Ran into error {e} when parsing users with search url: {url}\n return dict: {login_json}") + return None + for item in login_json['items']: if item['score'] > match['score']: match = item From 4170f960f4a150d6d2d355bbb221d42494af8b07 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 14 Mar 2024 20:20:31 -0500 Subject: [PATCH 5/6] Add mr assignees --- augur/tasks/gitlab/merge_request_task.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/augur/tasks/gitlab/merge_request_task.py b/augur/tasks/gitlab/merge_request_task.py index c7b71a2645..d5212a52d4 100644 --- a/augur/tasks/gitlab/merge_request_task.py +++ b/augur/tasks/gitlab/merge_request_task.py @@ -153,9 +153,8 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db): logger.info(f"{task_name}: Inserting other pr data of lengths: Labels: {len(mr_label_dicts)} - Assignees: {len(mr_assignee_dicts)}") - # TODO: Setup unique key on asignees with a value of ('cntrb_id', 'pull_request_id') and add 'cntrb_id' to assingee data - # mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id'] - # augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys) + mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id'] + augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys) pr_label_natural_keys = ['pr_src_id', 'pull_request_id'] pr_label_string_fields = ["pr_src_description"] From e81e5ea8bfaa20136862988d4313a295e5b03d52 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Thu, 14 Mar 2024 20:25:47 -0500 Subject: [PATCH 6/6] Uncomment message tasks so they run --- augur/tasks/start_tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index b4116dd7da..2a458649c3 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -16,8 +16,8 @@ from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data -from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_metadata, collect_merge_request_commits, collect_merge_request_files -from augur.tasks.gitlab.issues_task import collect_gitlab_issues +from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_metadata, collect_merge_request_commits, collect_merge_request_files, collect_merge_request_comments +from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments from augur.tasks.gitlab.events_task import collect_gitlab_issue_events, collect_gitlab_merge_request_events from augur.tasks.git.facade_tasks import * from augur.tasks.db.refresh_materialized_views import * @@ -91,7 +91,7 @@ def primary_repo_collect_phase_gitlab(repo_git): jobs = group( chain(collect_gitlab_merge_requests.si(repo_git), group( - #collect_merge_request_comments.s(repo_git), + collect_merge_request_comments.s(repo_git), #collect_merge_request_reviewers.s(repo_git), collect_merge_request_metadata.s(repo_git), collect_merge_request_commits.s(repo_git), @@ -99,7 +99,7 @@ def primary_repo_collect_phase_gitlab(repo_git): collect_gitlab_merge_request_events.si(repo_git), )), chain(collect_gitlab_issues.si(repo_git), group( - #collect_gitlab_issue_comments.s(repo_git), + collect_gitlab_issue_comments.s(repo_git), collect_gitlab_issue_events.si(repo_git), )), )