diff --git a/QoRTs-reference.pdf b/QoRTs-reference.pdf deleted file mode 100644 index a745246..0000000 Binary files a/QoRTs-reference.pdf and /dev/null differ diff --git a/QoRTs-vignette.pdf b/QoRTs-vignette.pdf index a4e0d6c..148228b 100644 Binary files a/QoRTs-vignette.pdf and b/QoRTs-vignette.pdf differ diff --git a/QoRTs.jar b/QoRTs.jar index addd434..4dce9af 100644 Binary files a/QoRTs.jar and b/QoRTs.jar differ diff --git a/QoRTs_1.2.11.tar.gz b/QoRTs_1.2.11.tar.gz deleted file mode 100644 index fb94cee..0000000 Binary files a/QoRTs_1.2.11.tar.gz and /dev/null differ diff --git a/QoRTs_1.2.19.tar.gz b/QoRTs_1.2.19.tar.gz new file mode 100644 index 0000000..d5c3ec7 Binary files /dev/null and b/QoRTs_1.2.19.tar.gz differ diff --git a/README.md b/README.md index 642abde..3bab4aa 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# QoRTs v1.2.11 -(Compiled Tue Feb 14 17:15:28 EST 2017) +# QoRTs v1.2.19 +(Compiled Fri Feb 17 13:51:51 EST 2017) The [QoRTs software package](http://hartleys.github.io/QoRTs/) is a fast, efficient, and portable multifunction toolkit designed to assist in diff --git a/example-walkthrough.pdf b/example-walkthrough.pdf index b8ca0bd..c558072 100644 Binary files a/example-walkthrough.pdf and b/example-walkthrough.pdf differ diff --git a/src/HartleyUtils/doc/lib/Thumbs.db b/src/HartleyUtils/doc/lib/Thumbs.db new file mode 100644 index 0000000..70ba6ab Binary files /dev/null and b/src/HartleyUtils/doc/lib/Thumbs.db differ diff --git a/src/HartleyUtils/src/main/scala/fileConversionUtils/generatePlotsWithR.scala b/src/HartleyUtils/src/main/scala/fileConversionUtils/generatePlotsWithR.scala index 991e584..349cfac 100644 --- a/src/HartleyUtils/src/main/scala/fileConversionUtils/generatePlotsWithR.scala +++ b/src/HartleyUtils/src/main/scala/fileConversionUtils/generatePlotsWithR.scala @@ -51,6 +51,12 @@ class genSimplePlots extends CommandLineRunUtil { argDesc = "The ID of the replicate. This will be only used for the plot labels.", defaultValue = Some("Untitled") ) :: + new BinaryArgument[String]( name = "prefix", + arg = List("--prefix"), + valueName = "prefixString", + argDesc = "The prefix for output QC files", + defaultValue = Some("") + ) :: new FinalArgument[String]( name = "qcdataDir", valueName = "qcDataDir", @@ -64,6 +70,7 @@ class genSimplePlots extends CommandLineRunUtil { if(out){ generateSimplePlots( parser.get[String]("qcDir"), + parser.get[String]("prefix"), parser.get[String]("uniqueID"), ! parser.get[Boolean]("noPng"), parser.get[Boolean]("makePdf"), @@ -73,7 +80,7 @@ class genSimplePlots extends CommandLineRunUtil { } } - def generateSimplePlots(qcdir : String, uniqueID : String, makePng : Boolean, makePdf : Boolean, makeSeparatePngs : Boolean) { + def generateSimplePlots(qcdir : String, qcprefix : String, uniqueID : String, makePng : Boolean, makePdf : Boolean, makeSeparatePngs : Boolean) { //val qcdir = outfileprefix.substring(0,outfileprefix.length - 2) if(makeSeparatePngs){ @@ -85,7 +92,7 @@ class genSimplePlots extends CommandLineRunUtil { } } - val rscriptString = makeRscriptString(qcdir, uniqueID, makePng,makePdf, makeSeparatePngs); + val rscriptString = makeRscriptString(qcdir, qcprefix, uniqueID, makePng,makePdf, makeSeparatePngs); val writer = openWriter(qcdir + "/QC.makeMultiplot.R"); writer.write(rscriptString); writer.close(); @@ -121,13 +128,14 @@ class genSimplePlots extends CommandLineRunUtil { logwriter.close(); } - def makeRscriptString(qcdir : String, uniqueID : String, makePng : Boolean, makePdf : Boolean, makeSeparatePngs : Boolean) : String = { + def makeRscriptString(qcdir : String, qcprefix : String, uniqueID : String, makePng : Boolean, makePdf : Boolean, makeSeparatePngs : Boolean) : String = { "# This is an automatically-generated R script designed to make a simple multiplot and/or pdf report for a sample.\n"+ "message(\"STARTING...\");\n"+ "library(QoRTs);\n"+ "unique.ID <- c(\"" + uniqueID + "\");\n"+ "qc.data.dir <- c(\"" + qcdir + "/\");\n"+ - "decoder.raw <- data.frame(unique.ID = as.character(unique.ID), qc.data.dir = as.character(qc.data.dir));\n"+ + "qc.data.prefix <- c(\""+ qcprefix + "\");\n"+ + "decoder.raw <- data.frame(unique.ID = as.character(unique.ID), qc.data.dir = as.character(qc.data.dir), qc.data.prefix=as.character(qc.data.prefix),stringsAsFactors=FALSE);\n"+ "decoder <- completeAndCheckDecoder(decoder = decoder.raw)\n"+ "message(decoder);\n"+ "message(lapply(names(decoder), function(n){ class(decoder[[n]]) }));\n"+ diff --git a/src/HartleyUtils/src/main/scala/qcUtils/runAllQC.scala b/src/HartleyUtils/src/main/scala/qcUtils/runAllQC.scala index a1ffad0..660400a 100644 --- a/src/HartleyUtils/src/main/scala/qcUtils/runAllQC.scala +++ b/src/HartleyUtils/src/main/scala/qcUtils/runAllQC.scala @@ -490,12 +490,13 @@ object runAllQC { defaultValue = Some(10000) ) :: - new BinaryOptionArgument[List[String]]( + new BinaryArgument[String]( name = "outfilePrefix", arg = List("--outfilePrefix"), valueName = "sampID", - argDesc = "Prefix to be prepended to all output files. If this is set, all output files will use the format: \"outfiledir/prefix.QC.qcfilename.txt.gz\""+ - "" + argDesc = "Prefix to be prepended to all output files. If this is set, all output files will use the format: \"outfiledir/QC.qcfilename.txt.gz\""+ + "", + defaultValue = Some("") ) :: //DEPRECIATED or BETA OPTIONS: @@ -664,7 +665,7 @@ object runAllQC { parser.get[Boolean]("keepOnTarget"), parser.get[Option[Double]]("randomSubsample"), parser.get[Option[Long]]("randomSeed"), - parser.get[Option[String]]("outfilePrefix"), + parser.get[String]("outfilePrefix"), parser.get[Int]("genomeBufferSize") ); } @@ -714,7 +715,7 @@ object runAllQC { keepOnTarget : Boolean, randomSubsample : Option[Double], randomSeed : Option[Long], - outfilePrefix : Option[String], + outfilePrefix : String, genomeBufferSize : Int){ randomSeed match { @@ -729,7 +730,7 @@ object runAllQC { reportln("Creating Directory: "+ outdir,"note"); outDirFile.mkdir(); } - val outfile = outdir + "/" + (if(outfilePrefix.isEmpty){""} else {outfilePrefix.get + "."}) + "QC"; + val outfile = outdir + "/" + outfilePrefix + "QC"; val logfile = outfile +"."+ internalUtils.stdUtils.getRandomString(12) + ".log"; internalUtils.Reporter.init_completeLogFile(logfile); @@ -797,7 +798,7 @@ object runAllQC { Set[String]("referenceMatch"); }) - reportln("Default functions: " + wrapSimpleLineWithIndent_staggered(defaultFunctonList.toList.sorted.mkString(", "), 68, " ", ""),"debug"); + //reportln("Default functions: " + wrapSimpleLineWithIndent_staggered(defaultFunctonList.toList.sorted.mkString(", "), 68, " ", ""),"debug"); val runFunc_initial = (if(runFunctions.isEmpty){ (defaultFunctonList ++ addFunctions.toSet) -- dropFunctions.toSet; @@ -848,7 +849,7 @@ object runAllQC { } //wrapSimpleLineWithIndent_staggered(line : String, width : Int, indent : String, firstLineIndent : String) - reportln("Running functions: " + wrapSimpleLineWithIndent_staggered(runFunc.toList.sorted.mkString(", "), 68, " ", ""),"progress"); + reportln("Running functions: " + wrapSimpleLineWithIndent_staggered(runFunc.toList.sorted(internalUtils.stdUtils.AlphabetOrdering).mkString(", "), 68, " ", ""),"progress"); //reportln("infile: " + infile , "note"); //reportln("outfile: " + outfile , "note"); @@ -952,7 +953,8 @@ object runAllQC { targetRegionBed=targetRegionBed, stopAfterNReads=stopAfterNReads, genomeFA = genomeFA,dropOnTarget=dropOnTarget,keepOnTarget=keepOnTarget,randomSubsample=randomSubsample, - genomeBufferSize = genomeBufferSize) + genomeBufferSize = genomeBufferSize, + outfilePrefix = outfilePrefix) } } @@ -995,7 +997,8 @@ object runAllQC { dropOnTarget : Boolean, keepOnTarget : Boolean, randomSubsample : Option[Double], - genomeBufferSize : Int + genomeBufferSize : Int, + outfilePrefix : String ){ @@ -1479,6 +1482,7 @@ object runAllQC { report("Generating plots...","progress") fileConversionUtils.generatePlotsWithR.generateSimplePlots( qcdir = outdir, + qcprefix = outfilePrefix, uniqueID = trackTitlePrefix, makePng = generateMultiPlot, makePdf = generatePdfReport, diff --git a/src/HartleyUtils/src/main/scala/runner/runner.scala b/src/HartleyUtils/src/main/scala/runner/runner.scala index ab4969f..15f1e37 100644 --- a/src/HartleyUtils/src/main/scala/runner/runner.scala +++ b/src/HartleyUtils/src/main/scala/runner/runner.scala @@ -9,9 +9,9 @@ import internalUtils.commandLineUI._; object runner { - final val QORTS_VERSION = "1.2.11"; // REPLACE_THIS_QORTS_VERSION_VARIABLE_WITH_VERSION_NUMBER (note this exact text is used in a search-and-replace. Do not change it.) - final val QORTS_COMPILE_DATE = "Tue Feb 14 17:15:28 EST 2017"; // REPLACE_THIS_QORTS_DATE_VARIABLE_WITH_DATE (note this exact text is used in a search-and-replace. Do not change it.) - final val QORTS_COMPILE_TIME : Long = 1487110528; // REPLACE_THIS_QORTS_DATE_VARIABLE_WITH_TIME (note this exact text is used in a search-and-replace. Do not change it.) + final val QORTS_VERSION = "1.2.19"; // REPLACE_THIS_QORTS_VERSION_VARIABLE_WITH_VERSION_NUMBER (note this exact text is used in a search-and-replace. Do not change it.) + final val QORTS_COMPILE_DATE = "Fri Feb 17 13:51:51 EST 2017"; // REPLACE_THIS_QORTS_DATE_VARIABLE_WITH_DATE (note this exact text is used in a search-and-replace. Do not change it.) + final val QORTS_COMPILE_TIME : Long = 1487357511; // REPLACE_THIS_QORTS_DATE_VARIABLE_WITH_TIME (note this exact text is used in a search-and-replace. Do not change it.) final val QORTS_MAJOR_VERSION = QORTS_VERSION.split("\\.")(0); final val QORTS_MINOR_VERSION = QORTS_VERSION.split("\\.")(1); diff --git a/src/QoRTs/DESCRIPTION b/src/QoRTs/DESCRIPTION index 22e24ed..45949fb 100644 --- a/src/QoRTs/DESCRIPTION +++ b/src/QoRTs/DESCRIPTION @@ -1,6 +1,6 @@ Package: QoRTs -Version: 1.2.11 -Date: 2017-02-14 +Version: 1.2.19 +Date: 2017-02-17 Title: Quality of RNA-seq Tool Authors@R: c(person("Stephen Hartley, PhD", "Developer", role = c("aut", "cre"), email = "QoRTs-contact@list.nih.gov")) diff --git a/src/QoRTs/NEWS b/src/QoRTs/NEWS index b7ca943..b997ce1 100644 --- a/src/QoRTs/NEWS +++ b/src/QoRTs/NEWS @@ -1,12 +1,22 @@ +## v1.2.15: + + * Added full support for custom output file prefixes (per request by c-guzman, see https://github.com/hartleys/QoRTs/issues/27). + Output file prefixes can now be set using the --outfilePrefix parameter in the QoRTs QC step, and can be loaded into the QoRTs R + companion package by including a column titled "qc.data.prefix" in the decoder. + * Migrated QoRTs build/testing pipeline to a different server. + ## v1.2.0: -HUGE UPDATE: +HUGE update. Numerous new features have been added and are still undergoing beta testing. These new features may be subject to change in the next stable release. + +Among the changes: * Added support for whole-exome or whole-genome datasets in addition to RNA-Seq. (Maybe rename the tool?) * Added numerous metrics which may be relevant to variant calling. + * Intermediate file documentation: The raw QC metric files produced by QoRTs are now better documented. You can cause QoRTs QC to generate a documentation file using the parameter "--addFunctions writeDocs" Added an array of new metrics: - * "Overlap Mismatch": various metrics relating to the rate at which overlapped paired-end reads are mismatching. Also calculates mismatch rates at specific qual scores. - * "Reference Mismatch": various metrics relating to the rate at which reads have point-mismatches with the reference genome. Requires that a genome fasta file (via the --genomeFA parameter for the QoRTs QC step). + * "Overlap Mismatch": various metrics relating to the rate at which overlapped paired-end reads are found to mismatch one another. This can be used as a proxy for the sequencing error, since the two paired-end reads sequence the same physical cDNA fragment. Mismatch rates are calculated by base-swap type, by quality score, and by position in the reads. + * "Reference Mismatch": various metrics relating to the rate at which reads have point-mismatches with the reference genome. Requires that a genome fasta file (via the --genomeFA parameter for the QoRTs QC step). Mismatch rates are calculated by base-swap type, by quality score, and by position in the reads. * "On-Target Rate": For Exome data only. Uses a target bed file to calculate rate of on-target reads. Can also be used to filter reads to only on-target reads. Requires a target BED file (set via the --targetRegionBed parameter) * "Read Length Rates": Rates of observed read lengths. Useful if data is hard-trimmed prior to alignment. * Performance Plot: Plot shows the runtime performance of the QoRTs QC run. diff --git a/src/QoRTs/R/internal.plotting.func.R b/src/QoRTs/R/internal.plotting.func.R index 1cc1bf0..7f4f46e 100644 --- a/src/QoRTs/R/internal.plotting.func.R +++ b/src/QoRTs/R/internal.plotting.func.R @@ -100,7 +100,7 @@ makePlot.outlierSlices <- function( for(i in 1:length(has.sample.plot)){ curr.sample <- has.sample.plot[[i]]; - cat("."); + message(".",appendLF=FALSE); colorBy <- res@decoder$lane.ID; names(colorBy) <- res@decoder$unique.ID; diff --git a/src/QoRTs/R/minor.utils.R b/src/QoRTs/R/minor.utils.R index bc9dbdb..c6d4ca5 100644 --- a/src/QoRTs/R/minor.utils.R +++ b/src/QoRTs/R/minor.utils.R @@ -528,7 +528,7 @@ reportTimeAndDiff <- function(ts = NULL, prefix = ""){ } runTimedFunction <- function(expr, title= "",debugMode = TRUE){ - if(debugMode) cat(paste0(title,"...")); + if(debugMode) message(paste0(title,"..."),appendLF=FALSE); if(debugMode) ts <- timestamp(); expr; if(debugMode) message(paste0("done. ",getTimeAndDiff(ts))) diff --git a/src/QoRTs/R/read.files.R b/src/QoRTs/R/read.files.R index e390d80..4673d0f 100644 --- a/src/QoRTs/R/read.files.R +++ b/src/QoRTs/R/read.files.R @@ -9,11 +9,12 @@ DEFAULTDEBUGMODE <- TRUE; #Input can come in two forms: #(1) Single Decoder # This requires a single "decoder" file, which MUST have the following column names: -# unique.ID lane.ID group.ID sample.ID qc.data.dir -# Other than these 4 required columns, it can have as many additional columns as desired. Column names must be unique. -# OPTIONAL FIELDS: +# unique.ID +# It can also have the following optional fields: +# lane.ID group.ID sample.ID qc.data.dir qc.data.prefix # input.read.pair.count: the # of input reads. this must be included for mapping rate to be calculated. # multi.mapped.read.pair.count: the # of reads that were multi mapped by the aligner. this must be included for multi-mapping rate to be calculated. +# it can have as many additional columns as desired. Column names must be unique. # RESERVED FIELDS: Do not name any field this: # cycle.CT # lanebam.ID (a synonym for unique.ID) @@ -145,11 +146,18 @@ expandAndCheckDecoder <- function(decoder) { decoder$qc.data.dir = decoder$unique.ID; } + if(! ("qc.data.prefix" %in% names(decoder))){ + message("column 'qc.data.prefix' not found in the decoder, assuming qc.data.prefix = \"\""); + decoder$qc.data.prefix = ""; + } + decoder$qc.data.dir <- as.character(decoder$qc.data.dir); decoder$group.ID <- as.character(decoder$group.ID); decoder$lane.ID <- as.character(decoder$lane.ID); decoder$sample.ID <- as.character(decoder$sample.ID); decoder$unique.ID <- as.character(decoder$unique.ID); + decoder$qc.data.prefix <- as.character(decoder$qc.data.prefix); + #Now do checks for validity: @@ -278,12 +286,13 @@ read.in.results.data.with.decoder <- function(decoder, infile.dir = "", stop("Fatal error: QoRTs run data not found! Use autodetectMissingSamples = TRUE to automatically skip these runs"); } } - - compFiles <- paste0(infile.dir,decoder$qc.data.dir,"/QC.QORTS_COMPLETED_OK"); + compFiles <- paste0(infile.dir,decoder$qc.data.dir,"/",decoder$qc.data.prefix,"QC.QORTS_COMPLETED_OK"); + #compFiles <- paste0(infile.dir,decoder$qc.data.dir,"/QC.QORTS_COMPLETED_OK"); compFileExists <- file.exists(compFiles) if(any(! compFileExists)){ - message("WARNING: QoRTs run may be incomplete! File not found: ",paste0(infile.dir,decoder$qc.data.dir[!compFileExists],"/QC.QORTS_COMPLETED_OK"),"!"); + message("WARNING: QoRTs run may be incomplete! File not found: ",paste0(infile.dir,decoder$qc.data.dir[!compFileExists],"/",decoder$qc.data.prefix[!compFileExists],"QC.QORTS_COMPLETED_OK"),"!"); + #message("WARNING: QoRTs run may be incomplete! File not found: ",paste0(infile.dir,decoder$qc.data.dir[!compFileExists],"/QC.QORTS_COMPLETED_OK"),"!"); if(autodetectMissingSamples){ message(" Skipping missing samples!"); decoder <- decoder[compFileExists,,drop=FALSE]; @@ -292,6 +301,18 @@ read.in.results.data.with.decoder <- function(decoder, infile.dir = "", } } + sumFiles <- paste0(infile.dir,decoder$qc.data.dir,"/",decoder$qc.data.prefix,"QC.summary.txt"); + sumFileExists <- file.exists(sumFiles) + if(any(! sumFileExists)){ + message("WARNING: QoRTs run may be incomplete! File not found: ",paste0(infile.dir,decoder$qc.data.dir[!sumFileExists],"/",decoder$qc.data.prefix[!sumFileExists],"QC.summary.txt"),"!"); + if(autodetectMissingSamples){ + message(" Skipping missing samples!"); + decoder <- decoder[sumFileExists,,drop=FALSE]; + } else { + stop("Fatal error: QoRTs run data not found! Use autodetectMissingSamples = TRUE to automatically skip these runs"); + } + } + res <- new("QoRTs_QC_Results"); res@lanebam.list <- decoder$unique.ID; res@sample.list <- unique(decoder$sample.ID); @@ -304,7 +325,7 @@ read.in.results.data.with.decoder <- function(decoder, infile.dir = "", names(lanebam.list) <- decoder$unique.ID qc.data.dir.list <- as.list(decoder$qc.data.dir); - names(qc.data.dir.list) <- decoder$unique.ID + names(qc.data.dir.list) <- decoder$unique.ID; if(debugMode) message("infile.dir = ",infile.dir); #if(debugMode) message("qc.data.dir.list = ",paste0(qc.data.dir.list,collapse=",")); @@ -313,7 +334,7 @@ read.in.results.data.with.decoder <- function(decoder, infile.dir = "", read.scalaqc.file.helper <- function(scalaqc_file, sep=""){ if(debugMode) message(paste0("scalaqc_file = ",scalaqc_file), appendLF=FALSE); if(debugMode) ts <- timestamp(); - out <- read.in.scalaQC.files(infile.dir,lanebam.list, qc.data.dir.list,scalaqc_file,sep=sep); + out <- read.in.scalaQC.files(infile.dir,lanebam.list, qc.data.dir.list, decoder$qc.data.prefix,scalaqc_file,sep=sep); if(debugMode) reportTimeAndDiff(ts,prefix=" "); out; } @@ -351,7 +372,7 @@ read.in.results.data.with.decoder <- function(decoder, infile.dir = "", read.scalaqc.file.helper <- function(scalaqc_file, sep=""){ if(debugMode) message(paste0("(File ",which(USE.LIST == scalaqc_file)," of ",length(USE.LIST),"): ",scalaqc_file), appendLF=FALSE); if(debugMode) ts <- timestamp(); - out <- read.in.scalaQC.files(infile.dir,lanebam.list, qc.data.dir.list,scalaqc_file,sep=sep); + out <- read.in.scalaQC.files(infile.dir,lanebam.list, qc.data.dir.list,decoder$qc.data.prefix,scalaqc_file,sep=sep); if(debugMode) reportTimeAndDiff(ts,prefix=" "); out; } @@ -419,7 +440,8 @@ QC_INTERNAL_SCALAQC_FILE_LIST <- list( gc.byPair = "QC.gc.byPair.txt.gz", FQ.NVC.R1 = "QC.FQ.NVC.R1.txt.gz", FQ.NVC.R2 = "QC.FQ.NVC.R2.txt.gz", FQ.quals.r1 = "QC.FQ.quals.r1.txt.gz", - FQ.quals.r2 = "QC.FQ.quals.r2.txt.gz" + FQ.quals.r2 = "QC.FQ.quals.r2.txt.gz", + FQ.readLenDist = "QC.FQ.readLenDist.txt.gz" #, #spliceJunctionCounts.knownSplices = "scalaQC.spliceJunctionCounts.knownSplices.txt.gz" @@ -451,7 +473,9 @@ QC_INTERNAL_SCALAQC_FILE_LIST_SINGLE_END <- list( mismatchSizeRates="QC.mismatchSizeRates.txt.gz", FQ.gc.byRead = "QC.FQ.gc.byRead.txt.gz", FQ.NVC.R1 = "QC.FQ.NVC.R1.txt.gz", - FQ.quals.r1 = "QC.FQ.quals.r1.txt.gz" + FQ.quals.r1 = "QC.FQ.quals.r1.txt.gz", + FQ.readLenDist = "QC.FQ.readLenDist.txt.gz" + #, #spliceJunctionCounts.knownSplices = "scalaQC.spliceJunctionCounts.knownSplices.txt.gz" #spliceJunctionCounts.novelSplices = "scalaQC.spliceJunctionCounts.novelSplices.txt.gz" @@ -487,9 +511,9 @@ find.compression.variant.helper <- function(f){ } } -read.in.scalaQC.files <- function(infile.prefix, lanebam.list, qc.data.dir.list, infile.suffix, sep = ""){ +read.in.scalaQC.files <- function(infile.prefix, lanebam.list, qc.data.dir.list, qc.data.prefix, infile.suffix, sep = ""){ #message(paste0("reading ",infile.suffix," files"),appendLF=FALSE); - infiles <- find.compression.variant(paste0(infile.prefix,unlist(qc.data.dir.list),"/", infile.suffix)); + infiles <- find.compression.variant(paste0(infile.prefix,unlist(qc.data.dir.list),"/",qc.data.prefix, infile.suffix)); if(! is.na(infiles[1])){ #print("!") for(i in 1:length(infiles)){