Skip to content

Commit

Permalink
completed
Browse files Browse the repository at this point in the history
  • Loading branch information
Jathurshan0330 committed Nov 4, 2024
1 parent 28d474b commit febabdc
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 6 deletions.
27 changes: 27 additions & 0 deletions arrange_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse
import os



if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Arrange labels for CTOD dataset')
parser.add_argument('--save_path', type=str, help='Path to the folder to save the arranged labels')

args = parser.parse_args()

label_save_path = os.path.join(args.save_path, 'outcome_labels')
if not os.path.exists(label_save_path):
os.makedirs(label_save_path)
#copy gpt labels to label_save_path
gpt_labels_path = os.path.join(args.save_path, 'llm_predictions_on_pubmed/pubmed_gpt_outcomes.csv')

os.system(f'cp {gpt_labels_path} {label_save_path}/pubmed_gpt_outcomes.csv')

# copy trial linkage labels to label_save_path
trial_linkage_labels_path = os.path.join(args.save_path, 'clinical_trial_linkage/trial_linkages/outcome_labels/Merged_all_trial_linkage_outcome_df__FDA_updated.csv ')
trial_linkage_save_path = str(os.path.join(label_save_path, 'Merged_all_trial_linkage_outcome_df__FDA_updated.csv'))
os.system(f"cp {trial_linkage_labels_path} {trial_linkage_save_path}")




3 changes: 2 additions & 1 deletion clinical_trial_linkage/match_fda_approvals.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ def match_FDA_approvals_main(save_path,merged_all_pd_path,cross_encoder,dev=Fals
if merged_all_pd.iloc[i]['outcome'] != 'Success':
merged_all_pd.at[i, 'outcome'] = 'Success'

merge_all_save_path = merged_all_pd_path.split('.csv')[0] + '_FDA_updated.csv'
# merge_all_save_path = merged_all_pd_path.split('.csv')[0] + '_FDA_updated.csv'
merge_all_save_path = os.path.join(save_path, 'outcome_labels','Merged_all_trial_linkage_outcome_df__FDA_updated.csv' )
merged_all_pd.to_csv(merge_all_save_path, index=False)

print('Finished updating merged_all_pd with FDA approvals')
Expand Down
8 changes: 8 additions & 0 deletions llm_prediction_on_pubmed/clean_and_extract_final_outcomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ def main(gpt_decisions_path,top_2_pubmed_path):
# add to gpt_trial_outcomes using concat
gpt_trial_outcomes = pd.concat([gpt_trial_outcomes, pd.DataFrame({'nct_id': [trial], 'outcome': [trial_outcome]})])

elif os.path.exists(os.path.join(gpt_decisions_path,f'{trial}_gpt_response.txt')):
with open(os.path.join(gpt_decisions_path,f'{trial}_gpt_response.txt'), 'r') as f:
trial_outcome = f.read()
f.close()
trial_outcome = trial_outcome.split(""""outcome":""")[-1].split(',')[0].split('"')[1].split('"')[0]

# add to gpt_trial_outcomes using concat
gpt_trial_outcomes = pd.concat([gpt_trial_outcomes, pd.DataFrame({'nct_id': [trial], 'outcome': [trial_outcome]})])

# get common nct_ids in top2_pubmed_pd and gpt_trial_outcomes
common_nct_ids = list(set(top2_pubmed_pd['nct_id'].values).intersection(set(gpt_trial_outcomes['nct_id'].values)))
Expand Down
13 changes: 8 additions & 5 deletions pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ cd llm_prediction_on_pubmed
# python extract_pubmed_abstracts.py --data_path $DATA_PATH --save_path $SAVE_PATH #--dev
# echo "Search Pubmed and extract abstracts"
# python extract_pubmed_abstracts_through_search.py --data_path $DATA_PATH --save_path $SAVE_PATH #--dev
echo "Retrieving top 2 relevant abstracts"
python retrieve_top2_abstracts.py --data_path $DATA_PATH --save_path $SAVE_PATH #--dev
# echo "Retrieving top 2 relevant abstracts"
# python retrieve_top2_abstracts.py --data_path $DATA_PATH --save_path $SAVE_PATH #--dev
# echo "Obtaining LLM predictions"
# python get_llm_predictions.py --save_path $SAVE_PATH --azure #--dev
# python clean_and_extract_final_outcomes.py --save_path $SAVE_PATH


# # # # Getting Clinical Trial Linkage
# echo "Getting Clinical Trial Linkage"
# cd ..
# cd clinical_trial_linkage
cd ..
cd clinical_trial_linkage

# echo "Downloading FDA orange book and drug code dictionary"
# python download_data.py --save_path $SAVE_PATH # centralize the links in the .sh
Expand Down Expand Up @@ -55,13 +55,16 @@ python retrieve_top2_abstracts.py --data_path $DATA_PATH --save_path $SAVE_PATH
# python match_fda_approvals.py --save_path $SAVE_PATH #--dev



# News


#Stock prices


# Labeling

# echo "Copy all labeling results to the labeling folder"
cd ..
python arrange_labels.py --save_path $SAVE_PATH

# limit it to drugs

0 comments on commit febabdc

Please sign in to comment.