diff --git a/metaquantome/classes/SampleGroups.py b/metaquantome/classes/SampleGroups.py index abdbb61..3db8dd2 100644 --- a/metaquantome/classes/SampleGroups.py +++ b/metaquantome/classes/SampleGroups.py @@ -32,7 +32,8 @@ def __init__(self, sinfo): # name of experimental groups # sort alphabetically, so it's deterministic - self.grp_names = sorted(list(sample_names.keys())) + #self.grp_names = sorted(list(sample_names.keys())) + self.grp_names = list(sample_names.keys()) # when calculating means, column names for means # same order as grp names diff --git a/metaquantome/cli.py b/metaquantome/cli.py index 4ed4afa..deb3edc 100755 --- a/metaquantome/cli.py +++ b/metaquantome/cli.py @@ -37,7 +37,7 @@ def cli(): min_pep_nsamp=args.min_pep_nsamp, outfile=args.outfile) elif args.command == "stat": stat(infile=args.file, sinfo=args.samps, paired=args.paired, parametric=args.parametric, ontology=args.ontology, - mode=args.mode, outfile=args.outfile) + mode=args.mode, outfile=args.outfile, control_group=args.control_group) elif args.command == "viz": run_viz(plottype=args.plottype, img=args.img, @@ -51,6 +51,7 @@ def cli(): textannot=args.textannot, calculate_sep=args.calculate_sep, fc_name=args.fc_name, + fc_corr_p=args.fc_corr_p, flip_fc=args.flip_fc, gosplit=args.gosplit, sinfo=args.samps, @@ -62,7 +63,9 @@ def cli(): target_onto=args.target_onto, width=args.width, height=args.height, - tabfile=args.tabfile) + tabfile=args.tabfile, + feature_cluster_size=args.feature_cluster_size, + sample_cluster_size=args.sample_cluster_size) else: ValueError('incorrect mode. please provide one of "db", "expand", "filter", "stat", or "viz".') sys.exit(0) @@ -214,7 +217,9 @@ def parse_args_cli(): 'then a Wilcoxon test is performed.') parser_stat.add_argument('--paired', action='store_true', help='Perform paired tests.') - + parser_stat.add_argument('--control_group', required=True, + help='Sample group name of control samples (will be used as denominator for fold change).') + # ---- METAQUANTOME VIZ ---- # parser_viz.add_argument('--plottype', '-p', required=True, choices=['bar', 'volcano', 'heatmap', 'pca', 'ft_dist', 'stacked_bar'], help="Select the type of plot to generate.") @@ -226,11 +231,13 @@ def parse_args_cli(): help="Height of the image in inches. Defaults vary by plot type.") parser_viz.add_argument('--infile', '-i', required=True, help="Input file from stat or filter.") - parser_viz.add_argument('--strip', + parser_viz.add_argument('--strip', default=None, help="Text to remove from column names for plotting.") parser_viz.add_argument('--tabfile', default=None, help="Optional. File to write plot table to.") - + parser_viz.add_argument('--fc_corr_p', default=None, + help="Name of the corrected p-value column in the stat dataframe. Used while generating volcano plot and while using filter_to_sig in heatmap") + bar = parser_viz.add_argument_group('Arguments for barplots - including total taxonomy peptide intensity ("bar"), function-taxonomy ' + 'interaction distributions ("ft_dist"), and stacked taxonomy bar plots ("stacked_bar")') bar.add_argument('--meancol', @@ -274,6 +281,10 @@ def parse_args_cli(): help="Flag. Only plot significant terms? Necessitates use of results from `test`.") heat.add_argument('--alpha', default='0.05', help="If filter_to_sig, the q-value significance level.") + heat.add_argument('--feature_cluster_size', default='2', + help="Number of clusters 'k' to cut the feature dendrogram tree. Default = 2") + heat.add_argument('--sample_cluster_size', default='2', + help="Number of clusters 'k' to cut the sample dendrogram tree. Default = 2") pca = parser_viz.add_argument_group('Principal Components Analysis') pca.add_argument("--calculate_sep", action="store_true", diff --git a/metaquantome/data/test/cli_mult_test_out.tab b/metaquantome/data/test/cli_mult_test_out.tab index 0412da3..f596f4d 100644 --- a/metaquantome/data/test/cli_mult_test_out.tab +++ b/metaquantome/data/test/cli_mult_test_out.tab @@ -1,4 +1,4 @@ -id description s1_mean s2_mean int1 int2 int3 int4 int5 int6 log2fc_s1_over_s2 p corrected_p +id description s1_mean s2_mean int1 int2 int3 int4 int5 int6 log2fc_s1_over_s2 p_s1_over_s2 corrected_p_s1_over_s2 C Energy production and conversion 3.9696263509564815 3.841302253980942 3.584962500721156 4.3219280948873635 3.906890595608519 3.584962500721156 4.392317422778762 3.3219280948873617 0.1283240969755397 0.6832561051460733 0.6832561051460733 D Cell cycle control, cell division, chromosome partitioning 10.013089999440444 3.5443205162238103 9.965784284662089 10.228818690495881 9.813781191217037 3.584962500721156 3.700439718141092 3.3219280948873617 6.468769483216634 2.70286875006428e-06 8.10860625019284e-06 N Cell motility 4.544320516223809 11.468284625191268 4.3219280948873635 4.906890595608519 4.3219280948873635 11.773139206719689 10.965784284662087 11.550746785383243 -6.923964108967459 3.349508567404191e-05 5.024262851106286e-05 diff --git a/metaquantome/data/test/ec_ttest_tested.tab b/metaquantome/data/test/ec_ttest_tested.tab index 33da9d2..9c6c9ba 100644 --- a/metaquantome/data/test/ec_ttest_tested.tab +++ b/metaquantome/data/test/ec_ttest_tested.tab @@ -1,4 +1,4 @@ -id description s1_mean s2_mean int1 int2 int3 int4 int5 int6 int1_n_peptide int2_n_peptide int3_n_peptide int4_n_peptide int5_n_peptide int6_n_peptide int1_n_samp_children int2_n_samp_children int3_n_samp_children int4_n_samp_children int5_n_samp_children int6_n_samp_children log2fc_s1_over_s2 p corrected_p +id description s1_mean s2_mean int1 int2 int3 int4 int5 int6 int1_n_peptide int2_n_peptide int3_n_peptide int4_n_peptide int5_n_peptide int6_n_peptide int1_n_samp_children int2_n_samp_children int3_n_samp_children int4_n_samp_children int5_n_samp_children int6_n_samp_children log2fc_s1_over_s2 p_s1_over_s2 corrected_p_s1_over_s2 1.-.-.- Oxidoreductases. 10.013089999440444 3.5443205162238103 9.965784284662089 10.228818690495881 9.813781191217037 3.584962500721156 3.700439718141092 3.3219280948873617 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 6.468769483216634 2.70286875006428e-06 9.46004062522498e-06 1.2.-.- Acting on the aldehyde or oxo group of donors. 10.013089999440444 3.5443205162238103 9.965784284662089 10.228818690495881 9.813781191217037 3.584962500721156 3.700439718141092 3.3219280948873617 1.0 1.0 1.0 1.0 1.0 1.0 NA NA NA NA NA NA 6.468769483216634 2.70286875006428e-06 9.46004062522498e-06 3.-.-.- Hydrolases. 5.28540221886225 11.475564566327488 5.0 5.6438561897747235 5.1292830169449655 11.778077129535358 10.980853606379736 11.555547771647065 2.0 2.0 2.0 2.0 2.0 2.0 1.0 1.0 1.0 1.0 1.0 1.0 -6.190162347465239 4.742887028677606e-05 5.533368200123874e-05 diff --git a/metaquantome/data/test/go_tested.tab b/metaquantome/data/test/go_tested.tab index 469478e..abc4712 100644 --- a/metaquantome/data/test/go_tested.tab +++ b/metaquantome/data/test/go_tested.tab @@ -1,4 +1,4 @@ -id name namespace s1_mean s2_mean int1 int2 int3 int4 int5 int6 int1_n_peptide int2_n_peptide int3_n_peptide int4_n_peptide int5_n_peptide int6_n_peptide int1_n_samp_children int2_n_samp_children int3_n_samp_children int4_n_samp_children int5_n_samp_children int6_n_samp_children log2fc_s1_over_s2 p corrected_p +id name namespace s1_mean s2_mean int1 int2 int3 int4 int5 int6 int1_n_peptide int2_n_peptide int3_n_peptide int4_n_peptide int5_n_peptide int6_n_peptide int1_n_samp_children int2_n_samp_children int3_n_samp_children int4_n_samp_children int5_n_samp_children int6_n_samp_children log2fc_s1_over_s2 p_s1_over_s2 corrected_p_s1_over_s2 GO:0000003 reproduction biological_process 10.013089999440444 3.5443205162238103 9.965784284662089 10.228818690495881 9.813781191217037 3.584962500721156 3.700439718141092 3.3219280948873617 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 6.468769483216634 2.70286875006428e-06 6.7571718751607e-06 GO:0008150 biological_process biological_process 10.066537719931583 11.4814630999144 10.011227255423254 10.287712379549447 9.868822554774999 11.782998208920414 10.9901039638575 11.560332834212444 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 -1.414925379982817 0.013580739592325305 0.01697592449040663 GO:0008152 metabolic process biological_process 3.9696263509564815 3.841302253980942 3.584962500721156 4.3219280948873635 3.906890595608519 3.584962500721156 4.392317422778762 3.3219280948873617 1.0 1.0 1.0 1.0 1.0 1.0 NA NA NA NA NA NA 0.1283240969755397 0.6832561051460733 0.6832561051460733 diff --git a/metaquantome/modules/run_viz.py b/metaquantome/modules/run_viz.py index f26ab15..0cba867 100644 --- a/metaquantome/modules/run_viz.py +++ b/metaquantome/modules/run_viz.py @@ -8,11 +8,11 @@ def run_viz(plottype, img, infile, strip=None, mode=None, meancol=None, nterms='5', target_rank=None, barcol=6, # barplot, stacked_bar - textannot=None, fc_name=None, flip_fc=False, gosplit=False, # volcano + textannot=None, fc_name=None, fc_corr_p=None, flip_fc=False, gosplit=False, # volcano sinfo=None, filter_to_sig=False, alpha='0.05', # heatmap calculate_sep=False, # pca whichway=None, name=None, id=None, target_onto=None, # ft_dist - width='5', height='5', tabfile=None): + width='5', height='5', tabfile=None, feature_cluster_size=2, sample_cluster_size=2): """ Wrapper script for the command-line R visualizations The documentation for each of the arguments is in cli.py @@ -24,12 +24,12 @@ def run_viz(plottype, img, infile, strip=None, if plottype == "bar": cmd += [mode, meancol, nterms, width, height, target_rank, target_onto, barcol, tabfile] elif plottype == "volcano": - cmd += [str(textannot), fc_name, flip_fc, gosplit, width, height, tabfile] + cmd += [str(textannot), fc_name, fc_corr_p, flip_fc, gosplit, width, height, tabfile] elif plottype == "heatmap": samp_grps = SampleGroups(sinfo) all_intcols_str = ','.join(samp_grps.all_intcols) json_dump = json.dumps(samp_grps.sample_names) - cmd += [all_intcols_str, json_dump, filter_to_sig, alpha, width, height, strip] + cmd += [all_intcols_str, json_dump, filter_to_sig, alpha, width, height, strip, feature_cluster_size, sample_cluster_size, fc_corr_p] elif plottype == "pca": samp_grps = SampleGroups(sinfo) all_intcols_str = ','.join(samp_grps.all_intcols) diff --git a/metaquantome/modules/stat.py b/metaquantome/modules/stat.py index 6676986..447c57c 100644 --- a/metaquantome/modules/stat.py +++ b/metaquantome/modules/stat.py @@ -7,7 +7,7 @@ from metaquantome.util.stat_io import read_expanded_table, write_stat -def stat(infile, sinfo, paired, parametric, ontology, mode, outfile): +def stat(infile, sinfo, paired, parametric, ontology, mode, outfile, control_group): """ Module function that tests differential expression between 2 experimental conditions @@ -27,13 +27,44 @@ def stat(infile, sinfo, paired, parametric, ontology, mode, outfile): # read in df = read_expanded_table(infile, samp_grps) - if samp_grps.ngrps != 2: - ValueError('testing is only available for 2 experimental conditions.') + #if samp_grps.ngrps != 2: + # raise ValueError('testing is only available for 2 experimental conditions.') + + ################### + # Addition by Praveen to enable pairwise stat while inputting multiple sample groups + ################### + ctrl_grp = control_group + if ctrl_grp not in samp_grps.grp_names: + raise ValueError('Control sample group incorrect/missing.') + # print(samp_grps.grp_names) + + df_test = df.copy() + + case_grps = samp_grps.grp_names + ctrl_grp = ctrl_grp + case_grps.remove(ctrl_grp) + fc_new_cols = [] + for each_case in case_grps: + tmp_samp_json = '{"'+str(each_case.strip())+'":'+str(samp_grps.sample_names[each_case]).replace("'","\"")+', "'+ str(ctrl_grp.strip()) +'":'+str(samp_grps.sample_names[str(ctrl_grp.strip())]).replace("'","\"")+'}' + tmp_samp_grps = SampleGroups(tmp_samp_json) + + # run test for each pair of groups + df_tmp = test_norm_intensity(df, tmp_samp_grps, paired, parametric) + + df_test[tmp_samp_grps.fc_name] = df_tmp[tmp_samp_grps.fc_name].tolist() + df_test[P_COLNAME+"_"+tmp_samp_grps.fc_name.replace("log2fc_","")] = df_tmp[P_COLNAME+"_"+tmp_samp_grps.fc_name.replace("log2fc_","")].tolist() + df_test[P_CORR_COLNAME+"_"+tmp_samp_grps.fc_name.replace("log2fc_","")] = df_tmp[P_CORR_COLNAME+"_"+tmp_samp_grps.fc_name.replace("log2fc_","")].tolist() + + fc_new_cols = fc_new_cols + [tmp_samp_grps.fc_name, P_COLNAME+"_"+tmp_samp_grps.fc_name.replace("log2fc_",""), P_CORR_COLNAME+"_"+tmp_samp_grps.fc_name.replace("log2fc_","")] + ################### + # run test - df_test = test_norm_intensity(df, samp_grps, paired, parametric) + #df_test = test_norm_intensity(df, samp_grps, paired, parametric) + # write out if outfile: - write_stat(df_test, samp_grps=samp_grps, ontology=ontology, mode=mode, outfile=outfile) + #write_stat(df_test, samp_grps=samp_grps, ontology=ontology, mode=mode, outfile=outfile) + write_stat(df_tmp, samp_grps=samp_grps, ontology=ontology, mode=mode, outfile=outfile, fc_new_cols=fc_new_cols) # return return df_test @@ -80,10 +111,12 @@ def test_norm_intensity(df, samp_grps, paired, parametric): df_means = log2_fold_change(df, samp_grps) # p values, uncorrected for multiple comparisons - df_means[P_COLNAME] = test_results + #df_means[P_COLNAME] = test_results + df_means[P_COLNAME+"_"+samp_grps.fc_name.replace("log2fc_","")] = test_results # fdr correction - df_means[P_CORR_COLNAME] = mc.fdrcorrection0(test_results, method='indep')[1] + #df_means[P_CORR_COLNAME] = mc.fdrcorrection0(test_results, method='indep')[1] + df_means[P_CORR_COLNAME+"_"+samp_grps.fc_name.replace("log2fc_","")] = mc.fdrcorrection0(test_results, method='indep')[1] # reset the index to be 'id' - this is mostly for testing, and doesn't affect the output file df_means.set_index('id', drop=False, inplace=True) diff --git a/metaquantome/modules/viz.R b/metaquantome/modules/viz.R index 909bdcd..f57e15c 100644 --- a/metaquantome/modules/viz.R +++ b/metaquantome/modules/viz.R @@ -224,12 +224,17 @@ hclust.ward <- function(x) { library(scico) heatmap_colors <- scico(30, palette = 'vik') -mq_heatmap <- function(img, df, all_intcols, colSideColors, filter_to_sig, alpha, width, height, strip){ +mq_heatmap <- function(img, df, all_intcols, colSideColors, filter_to_sig, alpha, width, height, strip, feature_cluster_size, sample_cluster_size, fc_corr_p, infilename){ # df is the output from either expand, stat, or filter # samp_columns is a vector of all columns with the term intensities # colSide colors is a vector of colors for the groups. Must be in the same order as samp_columns # filter to sig if (filter_to_sig){ + if (fc_corr_p == "None"){ + stop("corrected p-value column not defined. did you run metaquantome stat?", + call. = FALSE) + } + df$corrected_p <- df[, fc_corr_p] pvals <- df$corrected_p if (is.null(pvals)) { stop("the dataset does not have a column named 'corrected_p'. did you run metaquantome stat?", @@ -259,7 +264,13 @@ mq_heatmap <- function(img, df, all_intcols, colSideColors, filter_to_sig, alpha # build dendrograms feature.dend <- as.dendrogram(hclust.ward(cor.dist(datmat.scale))) sample.dend <- as.dendrogram(hclust.ward(cor.dist(t(datmat.scale)))) - + + # Output cluster file for features and samples + feature_cluster = cutree(hclust.ward(cor.dist(datmat.scale)),k=feature_cluster_size); + write.table(feature_cluster, file = paste("feature_cluster_",infilename,'.txt', sep=""), sep = "\t", col.names=FALSE, quote=FALSE) + sample_cluster = cutree(hclust.ward(cor.dist(t(datmat.scale))),k=sample_cluster_size); + write.table(sample_cluster, file = paste("sample_cluster_",infilename,'.txt', sep=""), sep = "\t", col.names=FALSE, quote=FALSE) + # write plot to img path png(filename=img, width=width, height=height, res=300, units="in") par(mar = rep(5, 4)) @@ -289,11 +300,14 @@ heatmap_cli <- function(args){ # 7. alpha - significance level # 8. image width (default 5) # 9. image height (default 5) + img <- args[2] infile <- args[3] df <- read_result(infile) - + + infilename = basename(infile) + # split all_intcols from SampleGroups(), for samp_columns vector all_intcols <- get_all_intcols(args[4]) @@ -304,7 +318,11 @@ heatmap_cli <- function(args){ width <- as.numeric(args[8]) height <- as.numeric(args[9]) strip <- args[10] - mq_heatmap(img, df, all_intcols, colSideColors, filter_to_sig, alpha, width, height, strip) + feature_cluster_size = args[11] + sample_cluster_size = args[12] + fc_corr_p <- args[13] + + mq_heatmap(img, df, all_intcols, colSideColors, filter_to_sig, alpha, width, height, strip, feature_cluster_size, sample_cluster_size, fc_corr_p, infilename) } ####### ==================== ####### @@ -336,15 +354,20 @@ sep_n <- function(clust){ avg_dist / sum(within_variance) } -mq_prcomp <- function(img, df, all_intcols, json_dump, colors, calculate_sep, width, height, strip){ +mq_prcomp <- function(img, df, all_intcols, json_dump, colors, calculate_sep, width, height, strip, infilename){ # function(img, df, all_intcols, colSideColors, filter_to_sig, alpha, width, height) # df is the result from filter # samp_columns is a vector of all intensity column names # cols is a vector of group colors mat <- impute(data.matrix(df[, all_intcols])) pr <- prcomp(mat, scale=TRUE, center=TRUE) - - + + + # write PC1 and PC2 rotation data to file + pc_data = pr$rotation[,1:2] + pc_data = data.frame("Samples"=rownames(pc_data), "PC1"=pc_data[,1], "PC2"=pc_data[,2]) + write.table(pc_data, file = paste("PC_data_",infilename,'.txt', sep=""), sep = "\t", quote=FALSE, row.names=FALSE) + # calculate separation # make list of indices if (calculate_sep){ @@ -408,6 +431,9 @@ prcomp_cli <- function(args){ img <- args[2] infile <- args[3] df <- read_result(infile) + + infilename = basename(infile) + # split all_intcols from SampleGroups(), for samp_columns vector all_intcols <- get_all_intcols(args[4]) # get color mapping @@ -419,14 +445,14 @@ prcomp_cli <- function(args){ strip <- args[9] mq_prcomp(img=img, df=df, all_intcols=all_intcols, json_dump=json_dump, colors=colors, calculate_sep=calculate_sep, width=width, height=height, - strip=strip) + strip=strip, infilename) } ####### ==================== ####### # VOLCANO # ####### ==================== ####### -mq_volcano <- function(df, img, fc_name, flip_fc, width, height, textannot, gosplit, tabfile){ +mq_volcano <- function(df, img, fc_name, fc_corr_p, flip_fc, width, height, textannot, gosplit, tabfile){ # df is the dataframe after stat # fc_name is the name of the column with the fold change data # textcol is the name of the column with the text describing the term @@ -434,7 +460,9 @@ mq_volcano <- function(df, img, fc_name, flip_fc, width, height, textannot, gosp if (flip_fc){ df$fc <- (-1)*df$fc } - df$neglog10p <- -log10(df[, "corrected_p"]) + df$corrected_p <- df[, fc_corr_p] + #df$neglog10p <- -log10(df[, "corrected_p"]) + df$neglog10p <- -log10(df$corrected_p) df$de <- abs(df$fc) > 1 & df$corrected_p < 0.05 xmax <- max(df$fc) * 1.2 xmin <- min(df$fc) * 1.2 @@ -481,24 +509,27 @@ volcano_cli <- function(args){ # 3. input tabular file # 4. name of text annotation column # 5. name of fold change column - # 6. whether to flip fc - # 7. whether to split GO by ontology/namespace - # 8. image width (default 5) - # 9. image height (default 5) + # 6. name of corrected p-value column + # 7. whether to flip fc + # 8. whether to split GO by ontology/namespace + # 9. image width (default 5) + # 10. image height (default 5) + img <- args[2] infile <- args[3] df <- read_result(infile) textannot <- args[4] fc_name <- args[5] - flip_fc <- (args[6] == "True") - gosplit <- (args[7] == "True") - width <- as.numeric(args[8]) - height <- as.numeric(args[9]) - tabfile <- args[10] + fc_corr_p <- args[6] + flip_fc <- (args[7] == "True") + gosplit <- (args[8] == "True") + width <- as.numeric(args[9]) + height <- as.numeric(args[10]) + tabfile <- args[11] if (tabfile == "None") tabfile <- NULL plt <- mq_volcano(df, img=img, textannot=textannot, fc_name=fc_name, - flip_fc=flip_fc, gosplit=gosplit, width=width, height=height, - tabfile=tabfile) + fc_corr_p=fc_corr_p, flip_fc=flip_fc, gosplit=gosplit, width=width, + height=height, tabfile=tabfile) } ####### ==================== ####### diff --git a/metaquantome/util/stat_io.py b/metaquantome/util/stat_io.py index d48eeea..a792b98 100644 --- a/metaquantome/util/stat_io.py +++ b/metaquantome/util/stat_io.py @@ -20,7 +20,7 @@ def read_expanded_table(file, samp_grps): return df -def write_stat(df, outfile, samp_grps, ontology, mode): +def write_stat(df, outfile, samp_grps, ontology, mode, fc_new_cols): """ write the output of stat @@ -31,6 +31,8 @@ def write_stat(df, outfile, samp_grps, ontology, mode): :param mode: f, t, or ft :return: None """ + #cols = expand_io.define_outfile_cols_expand(samp_grps, ontology, mode) +\ + # [samp_grps.fc_name, P_COLNAME, P_CORR_COLNAME] cols = expand_io.define_outfile_cols_expand(samp_grps, ontology, mode) +\ - [samp_grps.fc_name, P_COLNAME, P_CORR_COLNAME] + fc_new_cols expand_io.write_out_general(df, outfile=outfile, cols=cols) diff --git a/setup.py b/setup.py index 04c6216..bc9ae8a 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ from setuptools import setup, find_packages -VERSION = '1.1.0' + +VERSION = '2.0.0' URL = 'https://github.com/galaxyproteomics/metaquantome' AUTHOR = 'Caleb Easterly' AUTHOR_EMAIL = 'caleb.easterly@gmail.com' @@ -39,4 +40,4 @@ "Bug Tracker": "https://github.com/galaxyproteomics/metaquantome/issues", "Source Code": "https://github.com/galaxyproteomics/metaquantome", } -) \ No newline at end of file +) diff --git a/tests/travis/testCLI.py b/tests/travis/testCLI.py index f512901..3e105de 100644 --- a/tests/travis/testCLI.py +++ b/tests/travis/testCLI.py @@ -34,15 +34,15 @@ def testMultipleInt(self): self.assertEqual(exp_status, 0) test_out = testfile('cli_mult_test_out.tab') - test_command = "python3 metaquantome/cli.py stat -m f --outfile " + test_out + ' --file ' + exp_out + test_command = "python3 metaquantome/cli.py stat -m f --outfile " + test_out + ' --file ' + exp_out + ' --control_group s2 ' test_command += ''' --ontology cog ''' + " --samps '" + TTEST_SINFO + "'" + ' --parametric True ' test_status = subprocess.call(test_command, shell=True) self.assertEqual(test_status, 0) test_df = pd.read_csv(test_out, sep="\t", index_col='id') # make sure false is > 0.05 and trues are less than 0.05 - self.assertTrue(test_df['corrected_p']['C'] > 0.05) - self.assertTrue(test_df['corrected_p'][['N','D']].le(0.05).all()) + self.assertTrue(test_df['corrected_p_s1_over_s2']['C'] > 0.05) + self.assertTrue(test_df['corrected_p_s1_over_s2'][['N','D']].le(0.05).all()) def testViz(self): infile = testfile('taxonomy_write_simple.tab') @@ -100,7 +100,8 @@ def testHeatmapViz(self): '--infile', infile, '--img', imgfile, "--samps '", TTEST_SINFO, "'", - '--filter_to_sig', + '--filter_to_sig', 'corrected_p_s1_over_s2', + '--fc_corr_p', , '--alpha 0.5' ]) test_status = subprocess.call(cmd, shell=True) diff --git a/tests/travis/testStat.py b/tests/travis/testStat.py index 8c5537a..8aa87f4 100644 --- a/tests/travis/testStat.py +++ b/tests/travis/testStat.py @@ -62,11 +62,11 @@ def testDA(self): df_expd = expand.expand('f', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=expanded, func_file=func, func_colname='go', ontology='go') - df_tst = stat.stat(expanded, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='go', mode='f', + df_tst = stat.stat(expanded, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='go', mode='f', control_group='s2', outfile=test_write) # make sure false is > 0.05 and trues are less than 0.05 - self.assertTrue(df_tst['p']['GO:0008152'] > 0.05) - self.assertTrue(df_tst['p'][['GO:0022610','GO:0000003','GO:0032505']].le(0.05).all()) + self.assertTrue(df_tst['p_s1_over_s2']['GO:0008152'] > 0.05) + self.assertTrue(df_tst['p_s1_over_s2'][['GO:0022610','GO:0000003','GO:0032505']].le(0.05).all()) def testCogTTest(self): func = testfile('multiple_func.tab') @@ -75,11 +75,11 @@ def testCogTTest(self): cog_df = expand.expand('f', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', outfile=expandfile, func_file=func, func_colname='cog', ontology='cog') - cog_tst = stat.stat(expandfile, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='cog', mode='f', + cog_tst = stat.stat(expandfile, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='cog', mode='f', control_group='s2', outfile=None) # make sure false is > 0.05 and trues are less than 0.05 - self.assertTrue(cog_tst['p']['C'] > 0.05) - self.assertTrue(cog_tst['p'][['N', 'D']].le(0.05).all()) + self.assertTrue(cog_tst['p_s1_over_s2']['C'] > 0.05) + self.assertTrue(cog_tst['p_s1_over_s2'][['N', 'D']].le(0.05).all()) def testDiffAbundEc(self): func = testfile('multiple_func.tab') @@ -89,11 +89,11 @@ def testDiffAbundEc(self): expand.expand('f', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=expandfile, func_file=func, func_colname='ec', ontology='ec') - ec_tst = stat.stat(expandfile, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='ec', mode='f', + ec_tst = stat.stat(expandfile, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='ec', mode='f', control_group='s2', outfile=tested_file) # make sure false is > 0.05 and trues are less than 0.05 - self.assertTrue(ec_tst['p']['3.4.11.-'] > 0.05) - self.assertTrue(ec_tst['p'][['3.4.21.70', '1.2.-.-']].le(0.05).all()) + self.assertTrue(ec_tst['p_s1_over_s2']['3.4.11.-'] > 0.05) + self.assertTrue(ec_tst['p_s1_over_s2'][['3.4.21.70', '1.2.-.-']].le(0.05).all()) class TestTaxonomyAnalysisTest(unittest.TestCase): @@ -103,11 +103,11 @@ def testTaxTTests(self): expanded = testfile('expand_taxttest.tab') tax_df = expand.expand('t', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=expanded, tax_file=tax, tax_colname='lca') - tax_tst = stat.stat(expanded, sinfo=TTEST_SINFO, paired=False, parametric=False, ontology=None, mode=None, + tax_tst = stat.stat(expanded, sinfo=TTEST_SINFO, paired=False, parametric=False, ontology=None, mode=None, control_group='s2' outfile=None) # make sure false is > 0.05 and trues are less than 0.05 - self.assertTrue(tax_tst['p'][210] > 0.05) - self.assertTrue(tax_tst['p'][[1496,1870884]].le(0.05).all()) + self.assertTrue(tax_tst['p_s1_over_s2'][210] > 0.05) + self.assertTrue(tax_tst['p_s1_over_s2'][[1496,1870884]].le(0.05).all()) # also, make sure firmicutes phylum is sum of c difficile and clostridiaceae self.assertEqual(tax_tst['int1'][1239], np.log2(1020))