Skip to content

Commit

Permalink
debugged extract pubmed through search
Browse files Browse the repository at this point in the history
  • Loading branch information
Jathurshan0330 committed Oct 29, 2024
1 parent cc0a957 commit e6679d1
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,15 @@ def search_and_extract_pubmed(data_path,NCBI_api_key,email = ''):
reference_list = existing_reference_dict['References']
#get PMID from existing references
existing_pmids = [reference_list[i]['PMID'] for i in range(len(reference_list))]
existing_reference_types = [reference_list[i]['Reference type'].lower() for i in range(len(reference_list))]
if 'result' in existing_reference_types or 'search_result' in existing_reference_types:
continue
trial_ref_exists_in_data = True

if trial_ref_exists_in_data:
pmids = pmids - existing_pmids
# filter out existing pmids
pmids = [pmid for pmid in pmids if pmid not in existing_pmids]

if len(pmids) == 0:
continue
else:
Expand Down
4 changes: 2 additions & 2 deletions pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ SAVE_PATH=/srv/local/data/CTO
echo "Getting LLM predictions on Pubmed data"
cd llm_prediction_on_pubmed

echo "Extracting and Updating Pubmed data"
python extract_pubmed_abstracts.py --data_path $DATA_PATH --save_path $SAVE_PATH #--dev
# echo "Extracting and Updating Pubmed data"
# python extract_pubmed_abstracts.py --data_path $DATA_PATH --save_path $SAVE_PATH #--dev
echo "Search Pubmed and extract abstracts"
python extract_pubmed_abstracts_through_search.py --data_path $DATA_PATH --save_path $SAVE_PATH #--dev
# echo "Retrieving top 2 relevant abstracts"
Expand Down

0 comments on commit e6679d1

Please sign in to comment.