Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Value worker migrate to task #2497

Merged
merged 13 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Alter repo labor unique

Revision ID: 22
Revises: 21
Create Date: 2023-08-25 18:17:22.651191

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re

# revision identifiers, used by Alembic.
revision = '22'
down_revision = '21'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###

conn = op.get_bind()

#Remove constraint being initially deferred.
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name");
"""))
"""

"""
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()

#Make unique initially deferred
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name") DEFERRABLE INITIALLY DEFERRED;
"""))

# ### end Alembic commands ###
15 changes: 4 additions & 11 deletions augur/tasks/git/dependency_tasks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.application.db.util import execute_session_query
from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc
from augur.tasks.util.worker_util import parse_json_from_subprocess_call

def generate_deps_data(session, repo_id, path):
"""Runs deps modules on repo and stores data in database
"""Run dependency logic on repo and stores data in database
:param repo_id: Repository ID
:param path: Absolute path of the Repostiory
"""
Expand Down Expand Up @@ -80,16 +81,8 @@ def generate_scorecard(session,repo_id,path):
key_handler = GithubApiKeyHandler(session)
os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key()

p= subprocess.run(['./scorecard', command, '--format=json'], cwd= path_to_scorecard ,capture_output=True, text=True, timeout=None)
session.logger.info('subprocess completed successfully... ')
output = p.stdout

try:
required_output = json.loads(output)
except json.decoder.JSONDecodeError as e:
session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
return

required_output = parse_json_from_subprocess_call(session.logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard)

session.logger.info('adding to database...')
session.logger.debug(f"output: {required_output}")

Expand Down
4 changes: 3 additions & 1 deletion augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

from augur.tasks.git.dependency_tasks.tasks import process_dependency_metrics
from augur.tasks.git.dependency_libyear_tasks.tasks import process_libyear_dependency_metrics
from augur.tasks.git.scc_value_tasks.tasks import process_scc_value_metrics

from augur.tasks.github.util.github_paginator import GithubPaginator, hit_api
from augur.tasks.github.util.gh_graphql_entities import PullRequest
Expand Down Expand Up @@ -526,7 +527,8 @@ def facade_phase(repo_git):
group(
chain(*facade_core_collection),
process_dependency_metrics.si(repo_git),
process_libyear_dependency_metrics.si(repo_git)
process_libyear_dependency_metrics.si(repo_git),
process_scc_value_metrics.si(repo_git)
)
)

Expand Down
Empty file.
57 changes: 57 additions & 0 deletions augur/tasks/git/scc_value_tasks/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from datetime import datetime
import logging
import requests
import json
import os
import subprocess
import re
import traceback
from augur.application.db.models import *
from augur.application.db.session import DatabaseSession
from augur.application.config import AugurConfig
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.application.db.util import execute_session_query
from augur.tasks.util.worker_util import parse_json_from_subprocess_call

def value_model(session,repo_git,repo_id, path):
"""Runs scc on repo and stores data in database
:param repo_id: Repository ID
:param path: absolute file path of the Repostiory
"""

session.logger.info('Generating value data for repo')
session.logger.info(f"Repo ID: {repo_id}, Path: {path}")
session.logger.info('Running scc...')

path_to_scc = os.environ['HOME'] + '/scc'

required_output = parse_json_from_subprocess_call(session.logger,['./scc', '-f','json','--by-file', path], cwd=path_to_scc)

session.logger.info('adding scc data to database... ')
session.logger.debug(f"output: {required_output}")

to_insert = []
for record in required_output:
for file in record['Files']:
repo_labor = {
'repo_id': repo_id,
'rl_analysis_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ'),
'programming_language': file['Language'],
'file_path': file['Location'],
'file_name': file['Filename'],
'total_lines': file['Lines'],
'code_lines': file['Code'],
'comment_lines': file['Comment'],
'blank_lines': file['Blank'],
'code_complexity': file['Complexity'],
'repo_url': repo_git,
'tool_source': 'value_model',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}

to_insert.append(repo_labor)

session.insert_data(to_insert, RepoLabor, ["repo_id", "rl_analysis_date", "file_path", "file_name" ])

session.logger.info(f"Done generating scc data for repo {repo_id} from path {path}")
28 changes: 28 additions & 0 deletions augur/tasks/git/scc_value_tasks/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import logging
import traceback
from augur.application.db.session import DatabaseSession
from augur.tasks.git.scc_value_tasks.core import *
from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask, AugurCoreRepoCollectionTask
from augur.application.db.util import execute_session_query
from augur.application.config import AugurConfig
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path


@celery.task(base=AugurFacadeRepoCollectionTask)
def process_scc_value_metrics(repo_git):

from augur.tasks.init.celery_app import engine

logger = logging.getLogger(process_scc_value_metrics.__name__)

with DatabaseSession(logger,engine) as session:
logger.info(f"repo_git: {repo_git}")

query = session.query(Repo).filter(Repo.repo_git == repo_git)
repo = execute_session_query(query, 'one')

config = AugurConfig(session.logger, session)
absolute_repo_path = get_absolute_repo_path(config.get_section("Facade")['repo_directory'],repo.repo_id,repo.repo_path,repo.repo_name)

value_model(session,repo_git,repo.repo_id, absolute_repo_path)
4 changes: 3 additions & 1 deletion augur/tasks/init/celery_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class CollectionState(Enum):

git_tasks = ['augur.tasks.git.facade_tasks',
'augur.tasks.git.dependency_tasks.tasks',
'augur.tasks.git.dependency_libyear_tasks.tasks']
'augur.tasks.git.dependency_libyear_tasks.tasks',
'augur.tasks.git.scc_value_tasks.tasks']

data_analysis_tasks = ['augur.tasks.data_analysis.message_insights.tasks',
'augur.tasks.data_analysis.clustering_worker.tasks',
Expand Down Expand Up @@ -139,6 +140,7 @@ def on_failure(self,exc,task_id,args,kwargs,einfo):
'augur.tasks.github.pull_requests.tasks.collect_pull_request_review_comments': {'queue': 'secondary'},
'augur.tasks.git.dependency_tasks.tasks.process_ossf_dependency_metrics': {'queue': 'secondary'},
'augur.tasks.git.dependency_tasks.tasks.process_dependency_metrics': {'queue': 'facade'},
'augur.tasks.git.scc_value_tasks.tasks.process_scc_value_metrics' : {'queue': 'facade'},
'augur.tasks.git.dependency_libyear_tasks.tasks.process_libyear_dependency_metrics': {'queue': 'facade'},
'augur.tasks.frontend.*': {'queue': 'frontend'},
'augur.tasks.data_analysis.contributor_breadth_worker.*': {'queue': 'secondary'},
Expand Down
20 changes: 20 additions & 0 deletions augur/tasks/util/worker_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

from typing import Optional, List, Any, Tuple
from datetime import datetime, timedelta
import json
import subprocess

def create_grouped_task_load(*args,processes=8,dataList=[],task=None):

Expand Down Expand Up @@ -122,6 +124,24 @@ def calculate_date_weight_from_timestamps(added,last_collection,domain_start_day
#Else increase its weight
return -1 * factor

def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None):
logger.info(f"running subprocess {subprocess_arr[0]}")
if cwd:
p = subprocess.run(subprocess_arr,cwd=cwd,capture_output=True, text=True, timeout=None)
else:
p = subprocess.run(subprocess_arr,capture_output=True, text=True, timeout=None)

logger.info('subprocess completed... ')

output = p.stdout

try:
required_output = json.loads(output)
except json.decoder.JSONDecodeError as e:
session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
raise e

return required_output


# def create_server(app, worker=None):
Expand Down
17 changes: 17 additions & 0 deletions scripts/install/workers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,20 @@ else
echo "scorecard build done"
cd $CURRENT_DIR
fi

#Do the same thing for scc for value worker
if [ -d "$HOME/scc" ]; then
echo " Scc already exists, skipping cloning ..."
echo " Updating Scc ... "
rm -rf $HOME/scc
fi

echo "Cloning Sloc Cloc and Code (SCC) to generate value data ..."
git clone https://github.com/boyter/scc $HOME/scc
cd $HOME/scc
CURRENT_DIR=$PWD;
cd $CURRENT_DIR
cd $HOME/scc;
go build;
echo "scc build done"
cd $CURRENT_DIR