Skip to content

Commit

Permalink
Merge pull request #2745 from chaoss/dev
Browse files Browse the repository at this point in the history
Dev to index patch
  • Loading branch information
sgoggins authored Mar 15, 2024
2 parents 44faf3d + 9b838d0 commit 7c80774
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 23 deletions.
6 changes: 2 additions & 4 deletions augur/application/db/data_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,8 @@ def extract_needed_merge_request_assignee_data(assignees: List[dict], repo_id: i
for assignee in assignees:

assignee_dict = {
'contrib_id': None,
'contrib_id': assignee["cntrb_id"],
'repo_id': repo_id,
# TODO: Temporarily setting this to id which the id of the contributor, unitl we can get the contrib_id set and create a unique on the contrib_id and the pull_request_id
'pr_assignee_src_id': assignee["id"],
'tool_source': tool_source,
'tool_version': tool_version,
Expand Down Expand Up @@ -807,8 +806,7 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t
'pr_src_state': pr['state'],
'pr_src_locked': pr['discussion_locked'],
'pr_src_title': pr['title'],
# TODO: Add contributor logic for gitlab
'pr_augur_contributor_id': None,
'pr_augur_contributor_id': pr["cntrb_id"],
'pr_body': pr['description'],
'pr_created_at': pr['created_at'],
'pr_updated_at': pr['updated_at'],
Expand Down
2 changes: 1 addition & 1 deletion augur/application/db/models/augur_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ class LstmAnomalyResult(Base):
class Message(Base):
__tablename__ = "message"
__table_args__ = (
UniqueConstraint("platform_msg_id", name="message-insert-unique"),
UniqueConstraint("platform_msg_id", "pltfrm_id", name="message-insert-unique"),
Index("msg-cntrb-id-idx", "cntrb_id"),
Index("platformgrouper", "msg_id", "pltfrm_id"),
Index("messagegrouper", "msg_id", "rgls_id", unique=True),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
""" Update messages unique
Revision ID: 27
Revises: 26
Create Date: 2024-03-10
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text

# revision identifiers, used by Alembic.
revision = '27'
down_revision = '26'
branch_labels = None
depends_on = None


schema_name = 'augur_data'
table_name = "message"
constraint_name = "message-insert-unique"

def upgrade():

op.drop_constraint(constraint_name, table_name, schema=schema_name, type_='unique')

op.create_unique_constraint(constraint_name, table_name, ['platform_msg_id', 'pltfrm_id'], schema=schema_name)

def downgrade():

op.drop_constraint(constraint_name, table_name, schema=schema_name, type_='unique')

op.create_unique_constraint(constraint_name, table_name, ['platform_msg_id'], schema=schema_name)
2 changes: 1 addition & 1 deletion augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
session.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}")


if not len(missing_commits):
if not len(missing_commits) or repo_id is None:
#session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits))
return

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,12 @@ def get_login_with_supplemental_data(logger,db,auth, commit_data):
return None

# Grab first result and make sure it has the highest match score
match = login_json['items'][0]
try:
match = login_json['items'][0]
except IndexError as e:
logger.error(f"Ran into error {e} when parsing users with search url: {url}\n return dict: {login_json}")
return None

for item in login_json['items']:
if item['score'] > match['score']:
match = item
Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/github/messages/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def process_messages(messages, task_name, repo_id, logger, augur_db):
augur_db.insert_data(contributors, Contributor, ["cntrb_id"])

logger.info(f"{task_name}: Inserting {len(message_dicts)} messages")
message_natural_keys = ["platform_msg_id"]
message_natural_keys = ["platform_msg_id", "pltfrm_id"]
message_return_columns = ["msg_id", "platform_msg_id"]
message_string_fields = ["msg_text"]
message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys,
Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/github/pull_requests/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def collect_pull_request_review_comments(repo_git: str) -> None:


logger.info(f"Inserting {len(pr_review_comment_dicts)} pr review comments")
message_natural_keys = ["platform_msg_id"]
message_natural_keys = ["platform_msg_id", "pltfrm_id"]
message_return_columns = ["msg_id", "platform_msg_id"]
message_return_data = augur_db.insert_data(pr_review_comment_dicts, Message, message_natural_keys, message_return_columns)
if message_return_data is None:
Expand Down
6 changes: 3 additions & 3 deletions augur/tasks/gitlab/issues_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db):

for message in messages:

message, contributor = process_gitlab_comment_contributors(message, tool_source, tool_version, data_source)
message, contributor = process_gitlab_issue_comment_contributors(message, tool_source, tool_version, data_source)

if contributor:
contributors.append(contributor)
Expand All @@ -329,7 +329,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db):
augur_db.insert_data(contributors, Contributor, ["cntrb_id"])

logger.info(f"{task_name}: Inserting {len(message_dicts)} messages")
message_natural_keys = ["platform_msg_id"]
message_natural_keys = ["platform_msg_id", "pltfrm_id"]
message_return_columns = ["msg_id", "platform_msg_id"]
message_string_fields = ["msg_text"]
message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys,
Expand All @@ -352,7 +352,7 @@ def process_gitlab_issue_messages(data, task_name, repo_id, logger, augur_db):
augur_db.insert_data(issue_message_ref_dicts, IssueMessageRef, issue_message_ref_natural_keys)


def process_gitlab_comment_contributors(message, tool_source, tool_version, data_source):
def process_gitlab_issue_comment_contributors(message, tool_source, tool_version, data_source):

contributor = extract_needed_gitlab_contributor_data(message["author"], tool_source, tool_version, data_source)
if contributor:
Expand Down
61 changes: 54 additions & 7 deletions augur/tasks/gitlab/merge_request_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
from augur.tasks.gitlab.gitlab_api_handler import GitlabApiHandler
from augur.tasks.gitlab.gitlab_task_session import GitlabTaskManifest
from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_mr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data
from augur.application.db.data_parse import extract_needed_pr_data_from_gitlab_merge_request, extract_needed_merge_request_assignee_data, extract_needed_mr_label_data, extract_needed_mr_reviewer_data, extract_needed_mr_commit_data, extract_needed_mr_file_data, extract_needed_mr_metadata, extract_needed_gitlab_mr_message_ref_data, extract_needed_gitlab_message_data, extract_needed_gitlab_contributor_data
from augur.tasks.github.util.util import get_owner_repo, add_key_value_pair_to_dicts
from augur.application.db.models import PullRequest, PullRequestLabel, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message
from augur.application.db.models import PullRequest, PullRequestLabel, PullRequestMeta, PullRequestCommit, PullRequestFile, PullRequestMessageRef, Repo, Message, Contributor
from augur.application.db.util import execute_session_query
from augur.tasks.util.worker_util import remove_duplicate_dicts

platform_id = 2

Expand Down Expand Up @@ -99,12 +100,17 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db):
data_source = "Gitlab API"

merge_requests = []
contributors = []
mr_ids = []
mr_mapping_data = {}
for mr in data:

mr_ids.append(mr["iid"])

mr, contributor_data = process_mr_contributors(mr, tool_source, tool_version, data_source)

contributors += contributor_data

merge_requests.append(extract_needed_pr_data_from_gitlab_merge_request(mr, repo_id, tool_source, tool_version))

assignees = extract_needed_merge_request_assignee_data(mr["assignees"], repo_id, tool_source, tool_version, data_source)
Expand All @@ -117,6 +123,11 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db):
"labels": labels
}

contributors = remove_duplicate_dicts(contributors)

logger.info(f"{task_name}: Inserting {len(contributors)} contributors")
augur_db.insert_data(contributors, Contributor, ["cntrb_id"])

logger.info(f"{task_name}: Inserting mrs of length: {len(merge_requests)}")
pr_natural_keys = ["repo_id", "pr_src_id"]
pr_string_fields = ["pr_src_title", "pr_body"]
Expand All @@ -142,9 +153,8 @@ def process_merge_requests(data, task_name, repo_id, logger, augur_db):

logger.info(f"{task_name}: Inserting other pr data of lengths: Labels: {len(mr_label_dicts)} - Assignees: {len(mr_assignee_dicts)}")

# TODO: Setup unique key on asignees with a value of ('cntrb_id', 'pull_request_id') and add 'cntrb_id' to assingee data
# mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id']
# augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys)
mr_assignee_natural_keys = ['pr_assignee_src_id', 'pull_request_id']
augur_db.insert_data(mr_assignee_dicts, PullRequestAssignee, mr_assignee_natural_keys)

pr_label_natural_keys = ['pr_src_id', 'pull_request_id']
pr_label_string_fields = ["pr_src_description"]
Expand Down Expand Up @@ -208,6 +218,7 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db):
mr_number_to_id_map[mr.pr_src_number] = mr.pull_request_id

message_dicts = []
contributors = []
message_ref_mapping_data = {}
for id, messages in data.items():

Expand All @@ -221,6 +232,11 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db):

for message in messages:

message, contributor = process_gitlab_mr_comment_contributors(message, tool_source, tool_version, data_source)

if contributor:
contributors.append(contributor)

mr_message_ref_data = extract_needed_gitlab_mr_message_ref_data(message, pull_request_id, repo_id, tool_source, tool_version, data_source)

message_ref_mapping_data[message["id"]] = {
Expand All @@ -231,9 +247,13 @@ def process_gitlab_mr_messages(data, task_name, repo_id, logger, augur_db):
extract_needed_gitlab_message_data(message, platform_id, tool_source, tool_version, data_source)
)

contributors = remove_duplicate_dicts(contributors)

logger.info(f"{task_name}: Inserting {len(contributors)} mr message contributors")
augur_db.insert_data(contributors, Contributor, ["cntrb_id"])

logger.info(f"{task_name}: Inserting {len(message_dicts)} messages")
message_natural_keys = ["platform_msg_id"]
logger.info(f"{task_name}: Inserting {len(message_dicts)} mr messages")
message_natural_keys = ["platform_msg_id", "pltfrm_id"]
message_return_columns = ["msg_id", "platform_msg_id"]
message_string_fields = ["msg_text"]
message_return_data = augur_db.insert_data(message_dicts, Message, message_natural_keys,
Expand Down Expand Up @@ -560,3 +580,30 @@ def retrieve_merge_request_data(ids, url, name, owner, repo, key_auth, logger, r
index += 1

return all_data


def process_mr_contributors(mr, tool_source, tool_version, data_source):

contributors = []

issue_cntrb = extract_needed_gitlab_contributor_data(mr["author"], tool_source, tool_version, data_source)
mr["cntrb_id"] = issue_cntrb["cntrb_id"]
contributors.append(issue_cntrb)

for assignee in mr["assignees"]:

issue_assignee_cntrb = extract_needed_gitlab_contributor_data(assignee, tool_source, tool_version, data_source)
assignee["cntrb_id"] = issue_assignee_cntrb["cntrb_id"]
contributors.append(issue_assignee_cntrb)

return mr, contributors

def process_gitlab_mr_comment_contributors(message, tool_source, tool_version, data_source):

contributor = extract_needed_gitlab_contributor_data(message["author"], tool_source, tool_version, data_source)
if contributor:
message["cntrb_id"] = contributor["cntrb_id"]
else:
message["cntrb_id"] = None

return message, contributor
8 changes: 4 additions & 4 deletions augur/tasks/start_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from augur.tasks.github.pull_requests.commits_model.tasks import process_pull_request_commits
from augur.tasks.git.dependency_tasks.tasks import process_ossf_dependency_metrics
from augur.tasks.github.traffic.tasks import collect_github_repo_clones_data
from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_metadata, collect_merge_request_commits, collect_merge_request_files
from augur.tasks.gitlab.issues_task import collect_gitlab_issues
from augur.tasks.gitlab.merge_request_task import collect_gitlab_merge_requests, collect_merge_request_metadata, collect_merge_request_commits, collect_merge_request_files, collect_merge_request_comments
from augur.tasks.gitlab.issues_task import collect_gitlab_issues, collect_gitlab_issue_comments
from augur.tasks.gitlab.events_task import collect_gitlab_issue_events, collect_gitlab_merge_request_events
from augur.tasks.git.facade_tasks import *
from augur.tasks.db.refresh_materialized_views import *
Expand Down Expand Up @@ -91,15 +91,15 @@ def primary_repo_collect_phase_gitlab(repo_git):

jobs = group(
chain(collect_gitlab_merge_requests.si(repo_git), group(
#collect_merge_request_comments.s(repo_git),
collect_merge_request_comments.s(repo_git),
#collect_merge_request_reviewers.s(repo_git),
collect_merge_request_metadata.s(repo_git),
collect_merge_request_commits.s(repo_git),
collect_merge_request_files.s(repo_git),
collect_gitlab_merge_request_events.si(repo_git),
)),
chain(collect_gitlab_issues.si(repo_git), group(
#collect_gitlab_issue_comments.s(repo_git),
collect_gitlab_issue_comments.s(repo_git),
collect_gitlab_issue_events.si(repo_git),
)),
)
Expand Down

0 comments on commit 7c80774

Please sign in to comment.