Skip to content

Commit

Permalink
temporal off of the dedup.
Browse files Browse the repository at this point in the history
  • Loading branch information
hyunhwan-bcm committed Aug 1, 2024
1 parent ee37b39 commit d87fa98
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 7 deletions.
2 changes: 1 addition & 1 deletion bin/extraModel/generate_bivar_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def process_sample(data_folder, sample_id, default_pred, labeling=False):

# Remove all the duplicated variant pairs.
gene_var_pairs_df = pd.DataFrame(gene_var_pairs)
gene_var_pairs_df = gene_var_pairs_df.drop_duplicates(['varId1', 'varId2'])
# gene_var_pairs_df = gene_var_pairs_df.drop_duplicates(['varId1', 'varId2'])

# Use only subset columns of features
subset_feature_names = "diffuse_Phrank_STRING,hgmdSymptomScore,omimSymMatchFlag,hgmdSymMatchFlag,clinVarSymMatchFlag,omimGeneFound,omimVarFound,hgmdGeneFound,hgmdVarFound,clinVarVarFound,clinVarGeneFound,clinvarNumP,clinvarNumLP,clinvarNumLB,clinvarNumB,dgvVarFound,decipherVarFound,curationScoreHGMD,curationScoreOMIM,curationScoreClinVar,conservationScoreDGV,omimSymptomSimScore,hgmdSymptomSimScore,GERPpp_RS,gnomadAF,gnomadAFg,LRT_score,LRT_Omega,phyloP100way_vertebrate,gnomadGeneZscore,gnomadGenePLI,gnomadGeneOELof,gnomadGeneOELofUpper,IMPACT,CADD_phred,CADD_PHRED,DANN_score,REVEL_score,fathmm_MKL_coding_score,conservationScoreGnomad,conservationScoreOELof,Polyphen2_HDIV_score,Polyphen2_HVAR_score,SIFT_score,zyg,FATHMM_score,M_CAP_score,MutationAssessor_score,ESP6500_AA_AF,ESP6500_EA_AF,hom,hgmd_rs,spliceAImax,nc_ClinVar_Exp,nc_HGMD_Exp,nc_isPLP,nc_isBLB,c_isPLP,c_isBLB,nc_CLNREVSTAT,c_CLNREVSTAT,nc_RANKSCORE,c_RANKSCORE,CLASS,phrank,isB/LB,isP/LP,cons_transcript_ablation,cons_splice_acceptor_variant,cons_splice_donor_variant,cons_stop_gained,cons_frameshift_variant,cons_stop_lost,cons_start_lost,cons_transcript_amplification,cons_inframe_insertion,cons_inframe_deletion,cons_missense_variant,cons_protein_altering_variant,cons_splice_region_variant,cons_splice_donor_5th_base_variant,cons_splice_donor_region_variant,c_ClinVar_Exp_Del_to_Missense,c_ClinVar_Exp_Different_pChange,c_ClinVar_Exp_Same_pChange,c_HGMD_Exp_Del_to_Missense,c_HGMD_Exp_Different_pChange,c_HGMD_Exp_Same_pChange,c_HGMD_Exp_Stop_Loss,c_HGMD_Exp_Start_Loss,IMPACT.from.Tier,TierAD,TierAR,TierAR.adj,No.Var.HM,No.Var.H,No.Var.M,No.Var.L,AD.matched,AR.matched,recessive,dominant,simple_repeat".split(',')
Expand Down
9 changes: 3 additions & 6 deletions bin/extraModel_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ def assign_ranking(df):
return pred_df


# def AIM(data_folder, sample_id):
def AIM(data_folder, sample_id, n_thread):
def AIM(data_folder, sample_id):
feature_fn = f"{sample_id}.csv"

if not os.path.exists(feature_fn):
Expand Down Expand Up @@ -90,8 +89,7 @@ def AIM(data_folder, sample_id, n_thread):
data_folder=out_folder,
sample_id=sample_id,
default_pred=default_pred,
#labeling=False,
n_thread = n_thread
labeling=False,
)

recessive_feature_file = f"{out_folder}/recessive_matrix/{sample_id}.csv"
Expand Down Expand Up @@ -124,5 +122,4 @@ def AIM(data_folder, sample_id, n_thread):


# for sample_id in tqdm(sample_folders):
#AIM(out_folder, sample_id)
AIM(out_folder, sample_id, n_thread = 10)
AIM(out_folder, sample_id)

0 comments on commit d87fa98

Please sign in to comment.