diff --git a/utilities/rescue/run_randomforest_on_reference.R b/utilities/rescue/run_randomforest_on_reference.R index 8736709..907b392 100755 --- a/utilities/rescue/run_randomforest_on_reference.R +++ b/utilities/rescue/run_randomforest_on_reference.R @@ -73,10 +73,11 @@ message("\n\tPerforming ML filter data cleaning on classification...\n") NA_columns <- c("within_CAGE_peak", 'n_indels', "n_indels_junc", "FL", "predicted_NMD", "min_sample_cov", "min_cov", "ratio_exp", "bite", "diff_to_gene_TSS", "diff_to_gene_TTS" , "dist_to_polyA_site", - "dist_to_CAGE_peak", 'within_polyA_site', "polyA_dist") + "dist_to_CAGE_peak", 'within_polyA_site', "polyA_dist", + "ratio_TSS") replacement.na <- c(0, 0, 0, 0, "non_coding",0, 0,0, FALSE, - -11000, -11000, -11000, -11000, FALSE, -11000) + -11000, -11000, -11000, -11000, FALSE, -11000, 1) for (i in 1:length(NA_columns)) { sel.column <- which(colnames (classification) == NA_columns [i])