Skip to content

Commit

Permalink
Merge pull request #2497 from chaoss/value-worker-migrate-to-task
Browse files Browse the repository at this point in the history
Value worker migrate to task
  • Loading branch information
sgoggins authored Aug 25, 2023
2 parents 0424308 + 0c4ff66 commit fb47594
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Alter repo labor unique
Revision ID: 22
Revises: 21
Create Date: 2023-08-25 18:17:22.651191
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re

# revision identifiers, used by Alembic.
revision = '22'
down_revision = '21'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###

conn = op.get_bind()

#Remove constraint being initially deferred.
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name");
"""))
"""
"""
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()

#Make unique initially deferred
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name") DEFERRABLE INITIALLY DEFERRED;
"""))

# ### end Alembic commands ###
15 changes: 4 additions & 11 deletions augur/tasks/git/dependency_tasks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.application.db.util import execute_session_query
from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc
from augur.tasks.util.worker_util import parse_json_from_subprocess_call

def generate_deps_data(session, repo_id, path):
"""Runs deps modules on repo and stores data in database
"""Run dependency logic on repo and stores data in database
:param repo_id: Repository ID
:param path: Absolute path of the Repostiory
"""
Expand Down Expand Up @@ -80,16 +81,8 @@ def generate_scorecard(session,repo_id,path):
key_handler = GithubApiKeyHandler(session)
os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key()

p= subprocess.run(['./scorecard', command, '--format=json'], cwd= path_to_scorecard ,capture_output=True, text=True, timeout=None)
session.logger.info('subprocess completed successfully... ')
output = p.stdout

try:
required_output = json.loads(output)
except json.decoder.JSONDecodeError as e:
session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
return

required_output = parse_json_from_subprocess_call(session.logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard)

session.logger.info('adding to database...')
session.logger.debug(f"output: {required_output}")

Expand Down
4 changes: 3 additions & 1 deletion augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

from augur.tasks.git.dependency_tasks.tasks import process_dependency_metrics
from augur.tasks.git.dependency_libyear_tasks.tasks import process_libyear_dependency_metrics
from augur.tasks.git.scc_value_tasks.tasks import process_scc_value_metrics

from augur.tasks.github.util.github_paginator import GithubPaginator, hit_api
from augur.tasks.github.util.gh_graphql_entities import PullRequest
Expand Down Expand Up @@ -526,7 +527,8 @@ def facade_phase(repo_git):
group(
chain(*facade_core_collection),
process_dependency_metrics.si(repo_git),
process_libyear_dependency_metrics.si(repo_git)
process_libyear_dependency_metrics.si(repo_git),
process_scc_value_metrics.si(repo_git)
)
)

Expand Down
Empty file.
57 changes: 57 additions & 0 deletions augur/tasks/git/scc_value_tasks/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from datetime import datetime
import logging
import requests
import json
import os
import subprocess
import re
import traceback
from augur.application.db.models import *
from augur.application.db.session import DatabaseSession
from augur.application.config import AugurConfig
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.application.db.util import execute_session_query
from augur.tasks.util.worker_util import parse_json_from_subprocess_call

def value_model(session,repo_git,repo_id, path):
"""Runs scc on repo and stores data in database
:param repo_id: Repository ID
:param path: absolute file path of the Repostiory
"""

session.logger.info('Generating value data for repo')
session.logger.info(f"Repo ID: {repo_id}, Path: {path}")
session.logger.info('Running scc...')

path_to_scc = os.environ['HOME'] + '/scc'

required_output = parse_json_from_subprocess_call(session.logger,['./scc', '-f','json','--by-file', path], cwd=path_to_scc)

session.logger.info('adding scc data to database... ')
session.logger.debug(f"output: {required_output}")

to_insert = []
for record in required_output:
for file in record['Files']:
repo_labor = {
'repo_id': repo_id,
'rl_analysis_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ'),
'programming_language': file['Language'],
'file_path': file['Location'],
'file_name': file['Filename'],
'total_lines': file['Lines'],
'code_lines': file['Code'],
'comment_lines': file['Comment'],
'blank_lines': file['Blank'],
'code_complexity': file['Complexity'],
'repo_url': repo_git,
'tool_source': 'value_model',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}

to_insert.append(repo_labor)

session.insert_data(to_insert, RepoLabor, ["repo_id", "rl_analysis_date", "file_path", "file_name" ])

session.logger.info(f"Done generating scc data for repo {repo_id} from path {path}")
28 changes: 28 additions & 0 deletions augur/tasks/git/scc_value_tasks/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import logging
import traceback
from augur.application.db.session import DatabaseSession
from augur.tasks.git.scc_value_tasks.core import *
from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask, AugurCoreRepoCollectionTask
from augur.application.db.util import execute_session_query
from augur.application.config import AugurConfig
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path


@celery.task(base=AugurFacadeRepoCollectionTask)
def process_scc_value_metrics(repo_git):

from augur.tasks.init.celery_app import engine

logger = logging.getLogger(process_scc_value_metrics.__name__)

with DatabaseSession(logger,engine) as session:
logger.info(f"repo_git: {repo_git}")

query = session.query(Repo).filter(Repo.repo_git == repo_git)
repo = execute_session_query(query, 'one')

config = AugurConfig(session.logger, session)
absolute_repo_path = get_absolute_repo_path(config.get_section("Facade")['repo_directory'],repo.repo_id,repo.repo_path,repo.repo_name)

value_model(session,repo_git,repo.repo_id, absolute_repo_path)
4 changes: 3 additions & 1 deletion augur/tasks/init/celery_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class CollectionState(Enum):

git_tasks = ['augur.tasks.git.facade_tasks',
'augur.tasks.git.dependency_tasks.tasks',
'augur.tasks.git.dependency_libyear_tasks.tasks']
'augur.tasks.git.dependency_libyear_tasks.tasks',
'augur.tasks.git.scc_value_tasks.tasks']

data_analysis_tasks = ['augur.tasks.data_analysis.message_insights.tasks',
'augur.tasks.data_analysis.clustering_worker.tasks',
Expand Down Expand Up @@ -139,6 +140,7 @@ def on_failure(self,exc,task_id,args,kwargs,einfo):
'augur.tasks.github.pull_requests.tasks.collect_pull_request_review_comments': {'queue': 'secondary'},
'augur.tasks.git.dependency_tasks.tasks.process_ossf_dependency_metrics': {'queue': 'secondary'},
'augur.tasks.git.dependency_tasks.tasks.process_dependency_metrics': {'queue': 'facade'},
'augur.tasks.git.scc_value_tasks.tasks.process_scc_value_metrics' : {'queue': 'facade'},
'augur.tasks.git.dependency_libyear_tasks.tasks.process_libyear_dependency_metrics': {'queue': 'facade'},
'augur.tasks.frontend.*': {'queue': 'frontend'},
'augur.tasks.data_analysis.contributor_breadth_worker.*': {'queue': 'secondary'},
Expand Down
20 changes: 20 additions & 0 deletions augur/tasks/util/worker_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

from typing import Optional, List, Any, Tuple
from datetime import datetime, timedelta
import json
import subprocess

def create_grouped_task_load(*args,processes=8,dataList=[],task=None):

Expand Down Expand Up @@ -122,6 +124,24 @@ def calculate_date_weight_from_timestamps(added,last_collection,domain_start_day
#Else increase its weight
return -1 * factor

def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None):
logger.info(f"running subprocess {subprocess_arr[0]}")
if cwd:
p = subprocess.run(subprocess_arr,cwd=cwd,capture_output=True, text=True, timeout=None)
else:
p = subprocess.run(subprocess_arr,capture_output=True, text=True, timeout=None)

logger.info('subprocess completed... ')

output = p.stdout

try:
required_output = json.loads(output)
except json.decoder.JSONDecodeError as e:
session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
raise e

return required_output


# def create_server(app, worker=None):
Expand Down
17 changes: 17 additions & 0 deletions scripts/install/workers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,20 @@ else
echo "scorecard build done"
cd $CURRENT_DIR
fi

#Do the same thing for scc for value worker
if [ -d "$HOME/scc" ]; then
echo " Scc already exists, skipping cloning ..."
echo " Updating Scc ... "
rm -rf $HOME/scc
fi

echo "Cloning Sloc Cloc and Code (SCC) to generate value data ..."
git clone https://github.com/boyter/scc $HOME/scc
cd $HOME/scc
CURRENT_DIR=$PWD;
cd $CURRENT_DIR
cd $HOME/scc;
go build;
echo "scc build done"
cd $CURRENT_DIR

0 comments on commit fb47594

Please sign in to comment.