From 78efe66cb6edccd6ac55bdfcaa57366bea142845 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 21 Aug 2023 12:26:17 -0500 Subject: [PATCH 01/12] Add scc install for value worker to makefiles Signed-off-by: Isaac Milarsky --- augur/tasks/git/dependency_tasks/core.py | 2 +- scripts/install/workers.sh | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 4fdb3955ed..317311edbf 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -14,7 +14,7 @@ from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc def generate_deps_data(session, repo_id, path): - """Runs scc on repo and stores data in database + """Run dependency logic on repo and stores data in database :param repo_id: Repository ID :param path: Absolute path of the Repostiory """ diff --git a/scripts/install/workers.sh b/scripts/install/workers.sh index 6a7c95f48f..4f6dd7a62b 100755 --- a/scripts/install/workers.sh +++ b/scripts/install/workers.sh @@ -80,3 +80,20 @@ else echo "scorecard build done" cd $CURRENT_DIR fi + +#Do the same thing for scc for value worker +if [ -d "$HOME/scc" ]; then + echo " Scc already exists, skipping cloning ..." + echo " Updating Scc ... " + rm -rf $HOME/scc +fi + +echo "Cloning Sloc Cloc and Code (SCC) to generate value data ..." +git clone https://github.com/boyter/scc $HOME/scc +cd $HOME/scc +CURRENT_DIR=$PWD; +cd $CURRENT_DIR +cd $HOME/scc; +go build; +echo "scc build done" +cd $CURRENT_DIR \ No newline at end of file From 91ab0a6bcb490425cd7e0b1defc4438bd4c73a5e Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 21 Aug 2023 13:22:07 -0500 Subject: [PATCH 02/12] start implementing new value worker functionality Signed-off-by: Isaac Milarsky --- augur/tasks/git/dependency_tasks/core.py | 1 + augur/tasks/git/scc_value_tasks/__init__.py | 0 augur/tasks/git/scc_value_tasks/core.py | 22 ++++++++++++++++++++ augur/tasks/git/scc_value_tasks/tasks.py | 23 +++++++++++++++++++++ 4 files changed, 46 insertions(+) create mode 100644 augur/tasks/git/scc_value_tasks/__init__.py create mode 100644 augur/tasks/git/scc_value_tasks/core.py create mode 100644 augur/tasks/git/scc_value_tasks/tasks.py diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 317311edbf..0c2d462d23 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -12,6 +12,7 @@ from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from augur.application.db.util import execute_session_query from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc +from augut.tasks.git.utilitymethods import get_absolute_repo_path def generate_deps_data(session, repo_id, path): """Run dependency logic on repo and stores data in database diff --git a/augur/tasks/git/scc_value_tasks/__init__.py b/augur/tasks/git/scc_value_tasks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/augur/tasks/git/scc_value_tasks/core.py b/augur/tasks/git/scc_value_tasks/core.py new file mode 100644 index 0000000000..d4fad6b945 --- /dev/null +++ b/augur/tasks/git/scc_value_tasks/core.py @@ -0,0 +1,22 @@ +from datetime import datetime +import logging +import requests +import json +import os +import subprocess +import re +import traceback +from augur.application.db.models import * +from augur.application.db.session import DatabaseSession +from augur.application.config import AugurConfig +from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler +from augur.application.db.util import execute_session_query + +def value_model(session,repo_id, path): + """Runs scc on repo and stores data in database + :param repo_id: Repository ID + :param path: URL path of the Repostiory + """ + + session.logger.info('Generating value data for repo') + session.logger.info(f"Repo ID: {repo_id}, Path: {path}") \ No newline at end of file diff --git a/augur/tasks/git/scc_value_tasks/tasks.py b/augur/tasks/git/scc_value_tasks/tasks.py new file mode 100644 index 0000000000..ad3fe81885 --- /dev/null +++ b/augur/tasks/git/scc_value_tasks/tasks.py @@ -0,0 +1,23 @@ +import logging +import traceback +from augur.application.db.session import DatabaseSession +from augur.tasks.git.scc_value_tasks.core import * +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask, AugurCoreRepoCollectionTask +from augur.application.db.util import execute_session_query + +@celery.task(base=AugurFacadeRepoCollectionTask) +def process_scc_value_metrics(repo_git): + + from augur.tasks.init.celery_app import engine + + logger = logging.getLogger(process_scc_value_metrics.__name__) + + with DatabaseSession(logger,engine) as session: + logger.info(f"repo_git: {repo_git}") + + query = session.query(Repo).filter(Repo.repo_git == repo_git) + + repo = execute_session_query(query, 'one') + + value_model(session,repo.repo_id, repo_git) \ No newline at end of file From 6d473c3621e2df346d6b8f1dd9affba0d6389f31 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 21 Aug 2023 13:51:33 -0500 Subject: [PATCH 03/12] more progress with value task Signed-off-by: Isaac Milarsky --- augur/tasks/git/dependency_tasks/core.py | 1 - augur/tasks/git/scc_value_tasks/core.py | 7 +++++-- augur/tasks/git/scc_value_tasks/tasks.py | 9 +++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 0c2d462d23..317311edbf 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -12,7 +12,6 @@ from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from augur.application.db.util import execute_session_query from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc -from augut.tasks.git.utilitymethods import get_absolute_repo_path def generate_deps_data(session, repo_id, path): """Run dependency logic on repo and stores data in database diff --git a/augur/tasks/git/scc_value_tasks/core.py b/augur/tasks/git/scc_value_tasks/core.py index d4fad6b945..f769a7233e 100644 --- a/augur/tasks/git/scc_value_tasks/core.py +++ b/augur/tasks/git/scc_value_tasks/core.py @@ -15,8 +15,11 @@ def value_model(session,repo_id, path): """Runs scc on repo and stores data in database :param repo_id: Repository ID - :param path: URL path of the Repostiory + :param path: absolute file path of the Repostiory """ session.logger.info('Generating value data for repo') - session.logger.info(f"Repo ID: {repo_id}, Path: {path}") \ No newline at end of file + session.logger.info(f"Repo ID: {repo_id}, Path: {path}") + session.logger.info('Running scc...') + + \ No newline at end of file diff --git a/augur/tasks/git/scc_value_tasks/tasks.py b/augur/tasks/git/scc_value_tasks/tasks.py index ad3fe81885..0039886024 100644 --- a/augur/tasks/git/scc_value_tasks/tasks.py +++ b/augur/tasks/git/scc_value_tasks/tasks.py @@ -5,6 +5,9 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask, AugurCoreRepoCollectionTask from augur.application.db.util import execute_session_query +from augur.application.config import AugurConfig +from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path + @celery.task(base=AugurFacadeRepoCollectionTask) def process_scc_value_metrics(repo_git): @@ -17,7 +20,9 @@ def process_scc_value_metrics(repo_git): logger.info(f"repo_git: {repo_git}") query = session.query(Repo).filter(Repo.repo_git == repo_git) - repo = execute_session_query(query, 'one') - value_model(session,repo.repo_id, repo_git) \ No newline at end of file + config = AugurConfig(session.logger, session) + absolute_repo_path = get_absolute_repo_path(config.get_section("Facade")['repo_directory'],repo.repo_id,repo.repo_path,repo.repo_name) + + value_model(session,repo.repo_id, absolute_repo_path) \ No newline at end of file From 38ee211f07394a1d56b1c5cfa9eb01feeff0c01d Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 21 Aug 2023 13:58:44 -0500 Subject: [PATCH 04/12] parse output of scc into json Signed-off-by: Isaac Milarsky --- augur/tasks/git/scc_value_tasks/core.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/augur/tasks/git/scc_value_tasks/core.py b/augur/tasks/git/scc_value_tasks/core.py index f769a7233e..a3bc8e5baa 100644 --- a/augur/tasks/git/scc_value_tasks/core.py +++ b/augur/tasks/git/scc_value_tasks/core.py @@ -22,4 +22,19 @@ def value_model(session,repo_id, path): session.logger.info(f"Repo ID: {repo_id}, Path: {path}") session.logger.info('Running scc...') + path_to_scc = os.environ['HOME'] + '/scc' + + p = subprocess.run(['./scc', '-f','json', path], cwd=path_to_scc, capture_output=True, text=True, timeout=None) + session.logger.info('scc has completed... ') + output = p.stdout + + try: + required_data = json.loads(output) + except json.decoder.JSONDecodeError as e: + session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}") + return + + session.logger.info('adding scc data to database... ') + session.logger.debug(f"output: {required_output}") + \ No newline at end of file From 6fe820cce06c5cf6b62fbe649ede1e725d24e35f Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 21 Aug 2023 14:10:45 -0500 Subject: [PATCH 05/12] add method to parse json from subprocess call because we now do like the exact same thing in two places Signed-off-by: Isaac Milarsky --- augur/tasks/git/dependency_tasks/core.py | 13 +++---------- augur/tasks/git/scc_value_tasks/core.py | 11 ++--------- augur/tasks/util/worker_util.py | 20 ++++++++++++++++++++ 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 317311edbf..b00f8b863b 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -12,6 +12,7 @@ from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from augur.application.db.util import execute_session_query from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc +from augur.tasks.util.worker_util import parse_json_from_subprocess_call def generate_deps_data(session, repo_id, path): """Run dependency logic on repo and stores data in database @@ -86,16 +87,8 @@ def generate_scorecard(session,repo_id,path): key_handler = GithubApiKeyHandler(session) os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key() - p= subprocess.run(['./scorecard', command, '--format=json'], cwd= path_to_scorecard ,capture_output=True, text=True, timeout=None) - session.logger.info('subprocess completed successfully... ') - output = p.stdout - - try: - required_output = json.loads(output) - except json.decoder.JSONDecodeError as e: - session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}") - return - + required_output = parse_json_from_subprocess_call(session.logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard) + session.logger.info('adding to database...') session.logger.debug(f"output: {required_output}") diff --git a/augur/tasks/git/scc_value_tasks/core.py b/augur/tasks/git/scc_value_tasks/core.py index a3bc8e5baa..af94a41b13 100644 --- a/augur/tasks/git/scc_value_tasks/core.py +++ b/augur/tasks/git/scc_value_tasks/core.py @@ -11,6 +11,7 @@ from augur.application.config import AugurConfig from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from augur.application.db.util import execute_session_query +from augur.tasks.util.worker_util import parse_json_from_subprocess_call def value_model(session,repo_id, path): """Runs scc on repo and stores data in database @@ -24,15 +25,7 @@ def value_model(session,repo_id, path): path_to_scc = os.environ['HOME'] + '/scc' - p = subprocess.run(['./scc', '-f','json', path], cwd=path_to_scc, capture_output=True, text=True, timeout=None) - session.logger.info('scc has completed... ') - output = p.stdout - - try: - required_data = json.loads(output) - except json.decoder.JSONDecodeError as e: - session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}") - return + required_output = parse_json_from_subprocess_call(session.logger,['./scc', '-f','json', path], cwd=path_to_scc) session.logger.info('adding scc data to database... ') session.logger.debug(f"output: {required_output}") diff --git a/augur/tasks/util/worker_util.py b/augur/tasks/util/worker_util.py index b12646cd31..6380ed22b0 100644 --- a/augur/tasks/util/worker_util.py +++ b/augur/tasks/util/worker_util.py @@ -9,6 +9,8 @@ from typing import Optional, List, Any, Tuple from datetime import datetime, timedelta +import json +import subprocess def create_grouped_task_load(*args,processes=8,dataList=[],task=None): @@ -122,6 +124,24 @@ def calculate_date_weight_from_timestamps(added,last_collection,domain_start_day #Else increase its weight return -1 * factor +def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None): + logger.info(f"running subprocess {subprocess_arr[0]}") + if cwd: + p = subprocess.run(subprocess_arr,cwd=cwd,capture_output=True, text=True, timeout=None) + else: + p = subprocess.run(subprocess_arr,capture_output=True, text=True, timeout=None) + + logger.info('subprocess completed... ') + + output = p.stdout + + try: + required_output = json.loads(output) + except json.decoder.JSONDecodeError as e: + session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}") + raise e + + return required_output # def create_server(app, worker=None): From 980a8a96a7a94e2996547c15808356b57a59833b Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Mon, 21 Aug 2023 14:17:38 -0500 Subject: [PATCH 06/12] Finish first draft of implementation before testing Signed-off-by: Isaac Milarsky --- augur/tasks/git/scc_value_tasks/core.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/augur/tasks/git/scc_value_tasks/core.py b/augur/tasks/git/scc_value_tasks/core.py index af94a41b13..91caacd5ad 100644 --- a/augur/tasks/git/scc_value_tasks/core.py +++ b/augur/tasks/git/scc_value_tasks/core.py @@ -30,4 +30,27 @@ def value_model(session,repo_id, path): session.logger.info('adding scc data to database... ') session.logger.debug(f"output: {required_output}") - \ No newline at end of file + to_insert = [] + for record in required_output: + for file in record['Files']: + repo_labor = { + 'repo_id': repo_id, + 'rl_analysis_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ'), + 'programming_language': file['Language'], + 'file_path': file['Location'], + 'file_name': file['Filename'], + 'total_lines': file['Lines'], + 'code_lines': file['Code'], + 'comment_lines': file['Comment'], + 'blank_lines': file['Blank'], + 'code_complexity': file['Complexity'], + 'tool_source': 'value_model', + 'data_source': 'Git', + 'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') + } + + to_insert.append(repo_labor) + + session.insert_data(to_insert, RepoLabor, ["repo_id", "rl_analysis_date", "file_path", "file_name" ]) + + session.logger.info(f"Done generating scc data for repo {repo_id} from path {path}") From c74933c93945816a1fec72cfe3740088d7b6b4d3 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Fri, 25 Aug 2023 10:06:18 -0500 Subject: [PATCH 07/12] add scc value task to facade tasks Signed-off-by: Isaac Milarsky --- augur/tasks/git/facade_tasks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index d407011b06..3f08fde974 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -46,6 +46,7 @@ from augur.tasks.git.dependency_tasks.tasks import process_dependency_metrics from augur.tasks.git.dependency_libyear_tasks.tasks import process_libyear_dependency_metrics +from augur.tasks.git.scc_value_tasks.tasks import process_scc_value_metrics from augur.tasks.github.util.github_paginator import GithubPaginator, hit_api from augur.tasks.github.util.gh_graphql_entities import PullRequest @@ -526,7 +527,8 @@ def facade_phase(repo_git): group( chain(*facade_core_collection), process_dependency_metrics.si(repo_git), - process_libyear_dependency_metrics.si(repo_git) + process_libyear_dependency_metrics.si(repo_git), + process_scc_value_metrics.si(repo_git) ) ) From 497aa9dacf1449fda777dfa52bab183036b14182 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Fri, 25 Aug 2023 12:06:26 -0500 Subject: [PATCH 08/12] missing celery task declare Signed-off-by: Isaac Milarsky --- augur/tasks/init/celery_app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index ac6e18fc64..706541d1c7 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -52,7 +52,8 @@ class CollectionState(Enum): git_tasks = ['augur.tasks.git.facade_tasks', 'augur.tasks.git.dependency_tasks.tasks', - 'augur.tasks.git.dependency_libyear_tasks.tasks'] + 'augur.tasks.git.dependency_libyear_tasks.tasks', + 'augur.tasks.git.scc_value_tasks.tasks'] data_analysis_tasks = ['augur.tasks.data_analysis.message_insights.tasks', 'augur.tasks.data_analysis.clustering_worker.tasks', @@ -139,6 +140,7 @@ def on_failure(self,exc,task_id,args,kwargs,einfo): 'augur.tasks.github.pull_requests.tasks.collect_pull_request_review_comments': {'queue': 'secondary'}, 'augur.tasks.git.dependency_tasks.tasks.process_ossf_dependency_metrics': {'queue': 'secondary'}, 'augur.tasks.git.dependency_tasks.tasks.process_dependency_metrics': {'queue': 'facade'}, + 'augur.tasks.git.scc_value_tasks.tasks.process_scc_value_metrics' : {'queue': 'facade'}, 'augur.tasks.git.dependency_libyear_tasks.tasks.process_libyear_dependency_metrics': {'queue': 'facade'}, 'augur.tasks.frontend.*': {'queue': 'frontend'}, 'augur.tasks.data_analysis.contributor_breadth_worker.*': {'queue': 'secondary'}, From a2fc994c4e1a799ebf73f5f381897a3069a241f3 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Fri, 25 Aug 2023 12:36:27 -0500 Subject: [PATCH 09/12] make sure scc actually gives us file data Signed-off-by: Isaac Milarsky --- augur/tasks/git/scc_value_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/scc_value_tasks/core.py b/augur/tasks/git/scc_value_tasks/core.py index 91caacd5ad..1e159523e8 100644 --- a/augur/tasks/git/scc_value_tasks/core.py +++ b/augur/tasks/git/scc_value_tasks/core.py @@ -25,7 +25,7 @@ def value_model(session,repo_id, path): path_to_scc = os.environ['HOME'] + '/scc' - required_output = parse_json_from_subprocess_call(session.logger,['./scc', '-f','json', path], cwd=path_to_scc) + required_output = parse_json_from_subprocess_call(session.logger,['./scc', '-f','json','--by-file', path], cwd=path_to_scc) session.logger.info('adding scc data to database... ') session.logger.debug(f"output: {required_output}") From e483a2c0e594c5028c4c98c6423d78f048390e4c Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Fri, 25 Aug 2023 13:13:22 -0500 Subject: [PATCH 10/12] make it so that the unique constraint on repo_labor isn't initially deferred so that it can be used in postgres ON CONFLICT logic Signed-off-by: Isaac Milarsky --- .../versions/22_alter_repo_labor_unique.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py diff --git a/augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py b/augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py new file mode 100644 index 0000000000..151c964fc7 --- /dev/null +++ b/augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py @@ -0,0 +1,49 @@ +"""Add ml tasks + +Revision ID: 22 +Revises: 21 +Create Date: 2023-08-25 18:17:22.651191 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy.sql import text +import re + +# revision identifiers, used by Alembic. +revision = '22' +down_revision = '21' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + conn = op.get_bind() + + #Remove constraint being initially deferred. + conn.execute(text(f""" + ALTER TABLE "augur_data"."repo_labor" + DROP CONSTRAINT IF EXISTS "rl-unique", + ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name"); + """)) + """ + + """ + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + + #Make unique initially deferred + conn.execute(text(f""" + ALTER TABLE "augur_data"."repo_labor" + DROP CONSTRAINT IF EXISTS "rl-unique", + ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name") DEFERRABLE INITIALLY DEFERRED; + """)) + + # ### end Alembic commands ### From fd88add12e87f16a3b0e3a41a2aca7f86d83f037 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Fri, 25 Aug 2023 13:23:35 -0500 Subject: [PATCH 11/12] alter name of alembic revision Signed-off-by: Isaac Milarsky --- .../schema/alembic/versions/22_alter_repo_labor_unique.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py b/augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py index 151c964fc7..8a981073b5 100644 --- a/augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py +++ b/augur/application/schema/alembic/versions/22_alter_repo_labor_unique.py @@ -1,4 +1,4 @@ -"""Add ml tasks +"""Alter repo labor unique Revision ID: 22 Revises: 21 From b2774627ddbacff8e0c84c2514cb066c8404671b Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Fri, 25 Aug 2023 13:48:32 -0500 Subject: [PATCH 12/12] add repo_url field to repo_labor insert Signed-off-by: Isaac Milarsky --- augur/tasks/git/scc_value_tasks/core.py | 3 ++- augur/tasks/git/scc_value_tasks/tasks.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/augur/tasks/git/scc_value_tasks/core.py b/augur/tasks/git/scc_value_tasks/core.py index 1e159523e8..5fd7afb7b8 100644 --- a/augur/tasks/git/scc_value_tasks/core.py +++ b/augur/tasks/git/scc_value_tasks/core.py @@ -13,7 +13,7 @@ from augur.application.db.util import execute_session_query from augur.tasks.util.worker_util import parse_json_from_subprocess_call -def value_model(session,repo_id, path): +def value_model(session,repo_git,repo_id, path): """Runs scc on repo and stores data in database :param repo_id: Repository ID :param path: absolute file path of the Repostiory @@ -44,6 +44,7 @@ def value_model(session,repo_id, path): 'comment_lines': file['Comment'], 'blank_lines': file['Blank'], 'code_complexity': file['Complexity'], + 'repo_url': repo_git, 'tool_source': 'value_model', 'data_source': 'Git', 'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') diff --git a/augur/tasks/git/scc_value_tasks/tasks.py b/augur/tasks/git/scc_value_tasks/tasks.py index 0039886024..a2e4d11fc8 100644 --- a/augur/tasks/git/scc_value_tasks/tasks.py +++ b/augur/tasks/git/scc_value_tasks/tasks.py @@ -25,4 +25,4 @@ def process_scc_value_metrics(repo_git): config = AugurConfig(session.logger, session) absolute_repo_path = get_absolute_repo_path(config.get_section("Facade")['repo_directory'],repo.repo_id,repo.repo_path,repo.repo_name) - value_model(session,repo.repo_id, absolute_repo_path) \ No newline at end of file + value_model(session,repo_git,repo.repo_id, absolute_repo_path) \ No newline at end of file