forked from vijaybioinfo/clustering
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
114 lines (97 loc) · 4.48 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash
#########################
# Clustering algorithms #
#########################
# This script crates a PBS job for our snakemake clustering pipeline
# set -euo pipefail
function usage () {
cat >&2 <<EOF
USAGE: ${0} [-y] [options]
-y <config file> : Path to the YAML config file. Required.
-s Submit job.
-d Job ID after which, if successfully finishes, it will start running.
-v Verbose.
-h Print the usage info.
EOF
}
# initial : makes this loop silent and now requires '?)'
# ${opt} is each option and ${OPTARG} its the argumet (if a colon is there ${opt}:)
SUBMIT=FALSE
VERBOSE=FALSE
while getopts ":y:sd:vh" opt; do
case ${opt} in
y) CONFIG_FILE=${OPTARG};;
s) SUBMIT=TRUE;;
d) DEPEND=${OPTARG:-""};;
v) VERBOSE=TRUE;;
h) usage; exit 1;;
\?) echo "No -${OPTARG} argument found."; usage; exit 1;;
esac
done
if [[ ${OPTIND} -eq 1 ]] ; then
usage; exit 1
fi
#### Parameters #### -----------------------------------------------------------
function read_yaml(){
sed 's/#.*//g' ${1} | grep ${2}: | sed 's/.*:[^:\/\/]//; s/\"//g'
}
OUTPUT_DIR="$(read_yaml ${CONFIG_FILE} output_dir)"
PROJECT_NAME="$(read_yaml ${CONFIG_FILE} project_name)"
TOOLS="$(read_yaml ${CONFIG_FILE} tool | sed -E 's/\[|\]//g; s/,/_/g; s/ {1,}//g')"
OUTPUT_DIR="${OUTPUT_DIR%/}/${PROJECT_NAME}"
if grep -q 'pipeline:' "${CONFIG_FILE}"; then
PIPELINE_DIR=$(grep 'pipeline:' "${CONFIG_FILE}" | awk '{print $2}' | sed 's/\"//g')
else
PIPELINE_DIR=$(dirname "${0}")
fi
if [[ ! -s "${PIPELINE_DIR}/cluster.json" ]]; then PIPELINE_DIR=$(dirname "${0}"); fi
PIPELINE_DIR="${PIPELINE_DIR%/}"
CLUST_ENVIRON="$(read_yaml ${CONFIG_FILE} environment)"
CLUST_CONFIG="$(read_yaml ${CONFIG_FILE} cluster_config)"
if [[ ! -s "${CLUST_CONFIG}" ]]; then
CLUST_CONFIG="${PIPELINE_DIR}/cluster.json"
fi
echo ' '
echo -e "\033[0;36m**** Vijay Lab - LJI 2020\033[0m"
echo -e "\033[0;36m------------------------------- PRESENTING PARAMETERS -------------------------------\033[0m"
echo "Configuration file: ${CONFIG_FILE}"
echo "Output path: ${OUTPUT_DIR}"
echo "Pipeline: ${PIPELINE_DIR}"
echo "Cluster configuration: ${CLUST_CONFIG}"
echo -e "\033[0;36m------------------------------- --------------------- -------------------------------\033[0m"
if [[ ! -d "${OUTPUT_DIR}" ]]; then mkdir --parents "${OUTPUT_DIR}"; fi
if [[ ! -d "${OUTPUT_DIR}/scripts" ]]; then mkdir "${OUTPUT_DIR}/scripts"; fi
cp ${CONFIG_FILE} ${OUTPUT_DIR}/config.yaml # migh consider running a check
JOBFILE="${OUTPUT_DIR}/scripts/clump_${PROJECT_NAME}_${TOOLS}"
echo "Job file: ${JOBFILE}.sh"
rm ${OUTPUT_DIR}/scripts/clump_${PROJECT_NAME}*txt 2> /dev/null
wget -q https://raw.githubusercontent.com/vijaybioinfo/cellranger_wrappeR/main/routine_template.sh -O ${JOBFILE}.sh
sed -i 's|{cellranger}|clustering|' ${JOBFILE}.sh
sed -i 's|{username}|'"${USER}"'|g' ${JOBFILE}.sh
sed -i 's|{sampleid}|'"${PROJECT_NAME}"'|g' ${JOBFILE}.sh
sed -i 's|\/\.\.||g' ${JOBFILE}.sh
sed -i 's|{routine_pbs}|clump|' ${JOBFILE}.sh
sed -i 's|{outpath}|'"${OUTPUT_DIR}"'|g' ${JOBFILE}.sh
sed -i 's|cp ${PROJ.*|cp -r ${PROJDIR}/. ./|g' ${JOBFILE}.sh # to copy everything to scratch
sed -i 's|cp -R ./.*${PROJ.*|cp -r . ${PROJDIR}/|g' ${JOBFILE}.sh # copy from scratch
echo "Pushing critical lines..."
if [[ "${CLUST_ENVIRON}" != "" ]] && [[ "$(which conda | wc -l)" == "1" ]]; then
echo "Environment: $(conda env list | grep cluster | awk '{print $2 $3}')"
sed -i 's|# {after_copy}|source activate '"${CLUST_ENVIRON}"'; conda env list|g' ${JOBFILE}.sh
sed -i 's|# {pre_routine}|snakemake --snakefile '"${PIPELINE_DIR}"'/Snakefile --dag \| dot -Tpdf > '"${OUTPUT_DIR}"'/_results_outline.pdf|g' ${JOBFILE}.sh
fi
sed -i 's|{routine_params}|snakemake --jobs 100 --latency-wait 100 --cluster-config '"${CLUST_CONFIG}"' --snakefile '"${PIPELINE_DIR}"'/Snakefile --cluster "qsub -l {cluster.walltime} -l {cluster.cores} -l {cluster.memory} -m n -q default -e '"${OUTPUT_DIR}"'/scripts/ -o '"${OUTPUT_DIR}"'/scripts/" --jobname "clump.{rulename}.{jobid}" --stats '"${OUTPUT_DIR}"'/scripts/snakemake.stats >\& '"${OUTPUT_DIR}"'/scripts/snakemake.log|' ${JOBFILE}.sh
sed -i 's|{walltime}|36:00:00|g' ${JOBFILE}.sh
sed -i 's|{nodes}|1|g' ${JOBFILE}.sh
sed -i 's|{ppn}|1|g' ${JOBFILE}.sh
sed -i 's|{mem}|8gb|g' ${JOBFILE}.sh
if echo "${SUBMIT}" | grep -qE "TRUE|^yes$|^y$"; then
echo "Check it out"; exit
fi
if [[ "${DEPEND}" != "" ]]; then
DEPEND=$(qsub -W depend=afterok:${DEPEND} ${JOBFILE}.sh)
else
CID=$(qsub ${JOBFILE}.sh)
fi; CID=$(echo "${CID}" | sed 's/\..*//')
echo "Job ID: ${CID}";
echo