Skip to content

Commit

Permalink
#321 updates for new SemMedDB flow
Browse files Browse the repository at this point in the history
  • Loading branch information
ecwood committed Jul 25, 2023
1 parent b03b2ad commit d76afc0
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 35 deletions.
11 changes: 11 additions & 0 deletions Snakefile-extraction
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@ rule UMLS:
shell:
"bash -x " + config['CODE_DIR'] + "/extract-umls.sh " + config['BUILD_DIR'] + " {output} > {log} 2>&1"

rule SemMedDB:
input:
config['VALIDATION_PLACEHOLDER']
output:
tuplelist = config['SEMMED_TUPLELIST_FILE'],
exclusion_list = config['SEMMED_EXCLUSION_FILE']
log:
config['BUILD_DIR'] + "/extract-semmeddb" + config['TEST_SUFFIX'] + ".log"
shell:
"bash -x " + config['CODE_DIR'] + "/extract-semmeddb.sh {output.tuplelist} {output.exclusion_list} > {log} 2>&1"

rule UniProtKB:
input:
config['VALIDATION_PLACEHOLDER']
Expand Down
11 changes: 0 additions & 11 deletions Snakefile-semmeddb-extraction

This file was deleted.

11 changes: 3 additions & 8 deletions build-kg2-snakemake.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
set -o nounset -o pipefail -o errexit

if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
echo Usage: "$0 [test|alltest|all|-n|nodes|graphic|-R_*|-F] [-n|nodes|graphic|-R_*|-F] "
echo Usage: "$0 [test|all|-n|nodes|graphic|-R_*|-F] [-n|nodes|graphic|-R_*|-F] "
echo "[-n|nodes|graphic|-R_*|-F|ci] [nodes|ci|-n] [ci]"
exit 2
fi

# Usage: build-kg2-snakemake.sh [test|alltest|all|-n|nodes|graphic|-R_*|-F] [-n|nodes|graphic|-R_*|-F]
# Usage: build-kg2-snakemake.sh [test|all|-n|nodes|graphic|-R_*|-F] [-n|nodes|graphic|-R_*|-F]
# [-n|nodes|graphic|-R_*|-F|ci] [nodes|ci|-n] [ci]

config_dir=`dirname "$0"`
Expand All @@ -29,7 +29,7 @@ then
ci_flag="ci"
fi

if [[ "${build_flag}" == "test" || "${build_flag}" == "alltest" ]]
if [[ "${build_flag}" == "test" ]]
then
# The test argument for bash scripts (ex. extract-semmeddb.sh test)
test_flag="test"
Expand Down Expand Up @@ -128,11 +128,6 @@ echo 'include: "Snakefile-conversion"' >> ${snakefile}

echo 'include: "Snakefile-post-etl"' >> ${snakefile}

if [[ "${build_flag}" == "all" || "${build_flag}" == "alltest" ]]
then
echo 'include: "Snakefile-semmeddb-extraction"' >> ${snakefile}
fi

if [[ "${build_flag}" == "all" ]]
then
echo 'include: "Snakefile-extraction"' >> ${snakefile}
Expand Down
9 changes: 0 additions & 9 deletions extract-semmeddb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,7 @@ domain_range_exclusion_file=${2:-"${BUILD_DIR}/${domain_range_exclusion_filename

${curl_get} ${domain_range_exclusion_link} -o ${domain_range_exclusion_file}

if [[ "${build_flag}" == "test" || "${build_flag}" == 'alltest' ]]
then
test_arg=" --test"
else
test_arg=""
fi


${VENV_DIR}/bin/python3 ${CODE_DIR}/semmeddb_mysql_to_tuple_list_json.py \
${test_arg} \
${mysql_conf} \
${mysql_dbname} \
${semmed_ver} \
Expand Down
8 changes: 1 addition & 7 deletions semmeddb_mysql_to_tuple_list_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
def make_arg_parser():
arg_parser = argparse.ArgumentParser(description='semmeddb_mysql_to_tuple_list_json.py: extracts all the predicate triples from SemMedDB, ' +
'as a list of tuples')
arg_parser.add_argument('--test', dest='test', action="store_true", default=False)
arg_parser.add_argument('mysqlConfigFile', type=str)
arg_parser.add_argument('mysqlDBName', type=str)
arg_parser.add_argument('versionNumber', type=str)
Expand All @@ -44,11 +43,10 @@ def make_arg_parser():
version_date = args.versionDate
output_file_name = args.outputFile
version_number = version_number.strip('VER')
test_mode = args.test
connection = pymysql.connect(read_default_file=mysql_config_file, db=mysql_db_name)
preds_dict = dict()

output_info = kg2_util.create_single_jsonlines(test_mode)
output_info = kg2_util.create_single_jsonlines(False)
output = output_info[0]

# https://stackoverflow.com/questions/7208773/mysql-row-30153-was-cut-by-group-concat-error
Expand All @@ -60,10 +58,6 @@ def make_arg_parser():
"FROM ((PREDICATION NATURAL JOIN CITATIONS) NATURAL JOIN SENTENCE) NATURAL JOIN PREDICATION_AUX "
"GROUP BY SUBJECT_CUI, PREDICATE, OBJECT_CUI")


if test_mode:
sql_statement += " LIMIT 10000"

with connection.cursor() as cursor:
cursor.execute(max_len_sql_statement)
cursor.fetchall()
Expand Down

0 comments on commit d76afc0

Please sign in to comment.