From 7bcb7b4b302a29264b2a97d833efa5b5eb0015b5 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Mon, 23 Jan 2023 17:27:21 -0500 Subject: [PATCH 1/3] Add shell escapes for reference file paths containing metachars [VS-705] --- .../reference_disk/reference_disk_test.inputs | 3 ++- .../reference_disk/reference_disk_test.wdl | 11 ++++++++++- .../pipelines/common/action/ActionCommands.scala | 5 +++++ .../PipelinesApiAsyncBackendJobExecutionActor.scala | 4 +++- .../PipelinesApiAsyncBackendJobExecutionActor.scala | 3 ++- 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs b/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs index f23615ee7f0..4caf94c0f77 100644 --- a/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs +++ b/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.inputs @@ -1,4 +1,5 @@ { "wf_reference_disk_test.broad_reference_file_input": "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.fasta.fai", - "wf_reference_disk_test.nirvana_reference_file_input": "gs://broad-public-datasets/gvs/vat-annotations/Nirvana/3.18.1/SupplementaryAnnotation/GRCh38/phyloP_hg38.npd.idx" + "wf_reference_disk_test.nirvana_reference_file_input": "gs://broad-public-datasets/gvs/vat-annotations/Nirvana/3.18.1/SupplementaryAnnotation/GRCh38/phyloP_hg38.npd.idx", + "wf_reference_disk_test.nirvana_reference_file_metachar_input": "gs://broad-public-datasets/gvs/vat-annotations/Nirvana/3.18.1/SupplementaryAnnotation/GRCh38/1000_Genomes_Project_(SV)_Phase_3_v5a.nsi" } diff --git a/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.wdl b/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.wdl index 4d469cbe552..fe01ae34d06 100644 --- a/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.wdl +++ b/centaur/src/main/resources/standardTestCases/reference_disk/reference_disk_test.wdl @@ -4,17 +4,23 @@ task check_if_localized_as_symlink { input { File broad_reference_file_input File nirvana_reference_file_input + File nirvana_reference_file_metachar_input } String broad_input_symlink = "broad_input_symlink.txt" String nirvana_input_symlink = "nirvana_input_symlink.txt" + String nirvana_metachar_input_symlink = "nirvana_metachar_input_symlink.txt" command { # Print true if input is a symlink, otherwise print false. if test -h ~{broad_reference_file_input}; then echo true; else echo false; fi > ~{broad_input_symlink} if test -h ~{nirvana_reference_file_input}; then echo true; else echo false; fi > ~{nirvana_input_symlink} + + # Quotes added here due to the metachar in the filename. + if test -h "~{nirvana_reference_file_metachar_input}"; then echo true; else echo false; fi > ~{nirvana_metachar_input_symlink} } output { Boolean is_broad_input_symlink = read_boolean("~{broad_input_symlink}") Boolean is_nirvana_input_symlink = read_boolean("~{nirvana_input_symlink}") + Boolean is_nirvana_metachar_input_symlink = read_boolean("~{nirvana_metachar_input_symlink}") } runtime { docker: "ubuntu:latest" @@ -26,14 +32,17 @@ workflow wf_reference_disk_test { input { File broad_reference_file_input File nirvana_reference_file_input + File nirvana_reference_file_metachar_input } call check_if_localized_as_symlink { input: broad_reference_file_input = broad_reference_file_input, - nirvana_reference_file_input = nirvana_reference_file_input + nirvana_reference_file_input = nirvana_reference_file_input, + nirvana_reference_file_metachar_input = nirvana_reference_file_metachar_input } output { Boolean is_broad_input_file_a_symlink = check_if_localized_as_symlink.is_broad_input_symlink Boolean is_nirvana_input_file_a_symlink = check_if_localized_as_symlink.is_nirvana_input_symlink + Boolean is_nirvana_metachar_input_file_a_symlink = check_if_localized_as_symlink.is_nirvana_metachar_input_symlink } } diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala index 2357db6cf25..f32974855f8 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala @@ -33,6 +33,11 @@ object ActionCommands { def escape: String = StringEscapeUtils.escapeXSI(path.pathAsString) } + implicit class ShellString(val string: String) extends AnyVal { + // The command String runs in Bourne shell so shell metacharacters in filenames must be escaped + def escape: String = StringEscapeUtils.escapeXSI(string) + } + private def makeContentTypeFlag(contentType: Option[ContentType]) = contentType.map(ct => s"""-h "Content-Type: $ct"""").getOrElse("") def makeContainerDirectory(containerPath: Path) = s"mkdir -p ${containerPath.escape}" diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala index 2da04c65e92..b48e954936b 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -177,13 +177,15 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe private def generateGcsLocalizationScript(inputs: List[PipelinesApiInput], referenceInputsToMountedPathsOpt: Option[Map[PipelinesApiInput, String]]) (implicit gcsTransferConfiguration: GcsTransferConfiguration): String = { + // Generate a mapping of reference inputs to their mounted paths and a section of the localization script to // "faux localize" these reference inputs with symlinks to their locations on mounted reference disks. + import cromwell.backend.google.pipelines.common.action.ActionCommands._ val referenceFilesLocalizationScript = { val symlinkCreationCommandsOpt = referenceInputsToMountedPathsOpt map { referenceInputsToMountedPaths => referenceInputsToMountedPaths map { case (input, absolutePathOnRefDisk) => - s"mkdir -p ${input.containerPath.parent.pathAsString} && ln -s $absolutePathOnRefDisk ${input.containerPath.pathAsString}" + s"mkdir -p ${input.containerPath.parent.pathAsString.escape} && ln -s ${absolutePathOnRefDisk.escape} ${input.containerPath.pathAsString.escape}" } } diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala index fb6c8d425f7..b978b1b78eb 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -192,11 +192,12 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe (implicit gcsTransferConfiguration: GcsTransferConfiguration): String = { // Generate a mapping of reference inputs to their mounted paths and a section of the localization script to // "faux localize" these reference inputs with symlinks to their locations on mounted reference disks. + import cromwell.backend.google.pipelines.common.action.ActionCommands._ val referenceFilesLocalizationScript = { val symlinkCreationCommandsOpt = referenceInputsToMountedPathsOpt map { referenceInputsToMountedPaths => referenceInputsToMountedPaths map { case (input, absolutePathOnRefDisk) => - s"mkdir -p ${input.containerPath.parent.pathAsString} && ln -s $absolutePathOnRefDisk ${input.containerPath.pathAsString}" + s"mkdir -p ${input.containerPath.parent.pathAsString.escape} && ln -s ${absolutePathOnRefDisk.escape} ${input.containerPath.pathAsString.escape}" } } From 8ec136aa11d4c5df0ccf7d6338ccfb0dffa0c50b Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Mon, 23 Jan 2023 18:27:10 -0500 Subject: [PATCH 2/3] I blanked on that --- .../v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala index b48e954936b..93965330adf 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -177,7 +177,6 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe private def generateGcsLocalizationScript(inputs: List[PipelinesApiInput], referenceInputsToMountedPathsOpt: Option[Map[PipelinesApiInput, String]]) (implicit gcsTransferConfiguration: GcsTransferConfiguration): String = { - // Generate a mapping of reference inputs to their mounted paths and a section of the localization script to // "faux localize" these reference inputs with symlinks to their locations on mounted reference disks. import cromwell.backend.google.pipelines.common.action.ActionCommands._ From 14ba61ce376d4abf30eb8f445a6ba8673e7cd630 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Tue, 24 Jan 2023 15:31:43 -0500 Subject: [PATCH 3/3] PR feedback --- .../google/pipelines/common/action/ActionCommands.scala | 5 ----- .../v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala | 4 ++-- .../v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala | 4 ++-- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala index f32974855f8..2357db6cf25 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/action/ActionCommands.scala @@ -33,11 +33,6 @@ object ActionCommands { def escape: String = StringEscapeUtils.escapeXSI(path.pathAsString) } - implicit class ShellString(val string: String) extends AnyVal { - // The command String runs in Bourne shell so shell metacharacters in filenames must be escaped - def escape: String = StringEscapeUtils.escapeXSI(string) - } - private def makeContentTypeFlag(contentType: Option[ContentType]) = contentType.map(ct => s"""-h "Content-Type: $ct"""").getOrElse("") def makeContainerDirectory(containerPath: Path) = s"mkdir -p ${containerPath.escape}" diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala index 93965330adf..481f7d6c5a8 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -179,12 +179,12 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe (implicit gcsTransferConfiguration: GcsTransferConfiguration): String = { // Generate a mapping of reference inputs to their mounted paths and a section of the localization script to // "faux localize" these reference inputs with symlinks to their locations on mounted reference disks. - import cromwell.backend.google.pipelines.common.action.ActionCommands._ + import cromwell.backend.google.pipelines.common.action.ActionUtils.shellEscaped val referenceFilesLocalizationScript = { val symlinkCreationCommandsOpt = referenceInputsToMountedPathsOpt map { referenceInputsToMountedPaths => referenceInputsToMountedPaths map { case (input, absolutePathOnRefDisk) => - s"mkdir -p ${input.containerPath.parent.pathAsString.escape} && ln -s ${absolutePathOnRefDisk.escape} ${input.containerPath.pathAsString.escape}" + s"mkdir -p ${shellEscaped(input.containerPath.parent.pathAsString)} && ln -s ${shellEscaped(absolutePathOnRefDisk)} ${shellEscaped(input.containerPath.pathAsString)}" } } diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala index b978b1b78eb..3c0f5ca63f9 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -192,12 +192,12 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe (implicit gcsTransferConfiguration: GcsTransferConfiguration): String = { // Generate a mapping of reference inputs to their mounted paths and a section of the localization script to // "faux localize" these reference inputs with symlinks to their locations on mounted reference disks. - import cromwell.backend.google.pipelines.common.action.ActionCommands._ + import cromwell.backend.google.pipelines.common.action.ActionUtils.shellEscaped val referenceFilesLocalizationScript = { val symlinkCreationCommandsOpt = referenceInputsToMountedPathsOpt map { referenceInputsToMountedPaths => referenceInputsToMountedPaths map { case (input, absolutePathOnRefDisk) => - s"mkdir -p ${input.containerPath.parent.pathAsString.escape} && ln -s ${absolutePathOnRefDisk.escape} ${input.containerPath.pathAsString.escape}" + s"mkdir -p ${shellEscaped(input.containerPath.parent.pathAsString)} && ln -s ${shellEscaped(absolutePathOnRefDisk)} ${shellEscaped(input.containerPath.pathAsString)}" } }