Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup create_trajectories.sh with shellcheck #177

Merged
merged 7 commits into from
May 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 113 additions & 94 deletions utils/create_trajectories.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# ---------------------------------------------------------------------------------
# create_trajectories.sh - Generate and execute a set ABIN simulations
#
# Initial geometries (and optionally velocities) are taken sequentially from XYZ movie files.
# Initial geometries (and optionally velocities) are taken sequentially from a XYZ trajectory file.

# The trajectories are executed and stored in $folder.

Expand All @@ -12,47 +12,72 @@
# abin-randomint PRNG program for generating random seeds, should be in your $PATH.
#---------------------------------------------------------------------------------

#######-----SETUP---#############
irandom0=156863189 # random seed, set negative for random seed based on time
movie=coords.xyz # PATH TO a XYZ movie with initial geometries
veloc=vels.xyz # PATH to XYZ initial velocities (optional)
isample=1 # initial number of traj
nsample=100 # number of trajectories
folder=MP2-NH4 # Name of the folder with trajectories
inputdir=TEMPLATE-$folder # Directory with input files for ABIN
abin_input=$inputdir/input.in # main input file for ABIN
launch_script=$inputdir/r.abin # this is the file that is submitted by qsub
submit="qsub -q nq -cwd " # comment this line if you don't want to submit to queue yet
rewrite=0 # if =1 -> rewrite trajectories that already exist
jobs=20 # number of batch jobs to submit. Trajectories will be distributed accordingly.

molname=$folder # Name of the job in the queue
# Exit if undefined variable is used
set -u

#### SETUP ####
# Path to a XYZ file with initial geometries
movie=coords.xyz
# Path to XYZ file with initial velocities (optional)
# veloc=vels.xyz

# Starting index for the initial geometries
isample=1
# End index
nsample=100

# Folder name where the trajectories will be created
folder=MY_MOLECULE_TRAJS

# Directory with the input files for ABIN (input.in et al)
inputdir=TEMPLATE-$folder

# File with ABIN input parameters, we need this path
# so we can inject random number seed into it.
abin_input=$inputdir/input.in

# Random seed to generate random seeds for individual trajectories
# Set to a negative number for a time-based random seed.
irandom0=156863189

# Specify path to launch script that is normally submitted to the queuing system.
# Comment out this line if you're running locally.
launch_script=$inputdir/r.abin
# If you don't provide a launch script,
# we need a (preferably absolute) path to abin executable
# abin_exe=/path/to/abin

# Comment out this line if you don't want to run calculations yet
# submit_command="qsub -cwd -V -q nq -cwd "
# If you don't use queing system (like SLURM), use the following line
# submit_command=bash

# Number of batch jobs to submit to queue
# set only if you have more trajectories than jobs
# jobs=20
########## END OF SETUP ##########


function files_exist {
for f in $*;do
if [[ ! -f $f ]];then
for f in "$@";do
if [[ -n ${f-} && ! -f $f ]]; then
echo "ERROR: File '$f' does not exist!"
exit 1
fi
done
}

function folders_exist {
for d in $*;do
if [[ ! -d $d ]];then
for d in "$@";do
if [[ -n ${f-} && ! -d $d ]]; then
echo "ERROR: Directory '$d' does not exist!"
exit 1
fi
done
}

folders_exist "$inputdir"
files_exist "$movie" "$abin_input" "$launch_script"
if [[ -n "$veloc" ]];then
files_exist "$veloc"
fi
files_exist "$movie" "${veloc-}" "$abin_input" "${launch_script-}"

natom=$(head -1 $movie)
if [[ $natom -lt 1 ]];then
Expand All @@ -62,145 +87,139 @@ fi
echo "Number of atoms = $natom"

# TODO: Verify number of atoms and lines in the velocity file
let natom2=natom+2
lines=$(cat $movie | wc -l)
geoms=$(expr $lines / $natom2)
(( natom2=natom+2 ))
lines=$(wc -l < $movie)
(( geoms=lines/natom2 ))
if [[ $nsample -gt $geoms ]];then
echo "ERROR: Number of geometries ($geoms) is smaller than number of samples($nsample)."
echo "Change parameter \"nsample\"."
exit 1
fi

# determine number of ABIN simulations per job
let nsimul=nsample-isample+1
(( nsimul=nsample-isample+1 ))
if [[ -z ${jobs-} ]]; then
jobs=$nsimul
fi

if [[ $nsimul -le $jobs ]];then
remainder=0
injob=1
jobs=$nsimul
else
let injob=nsimul/jobs #number of simulations per job
(( injob=nsimul/jobs )) #number of simulations per job
# determine the remainder and distribute it evenly between jobs
let remainder=nsimul-injob*jobs
(( remainder=nsimul-injob*jobs ))
fi


j=1
i=$isample
w=0 #current number of simulations in current j-th job

#--------------------generation of random numbers--------------------------------
# Generate random number generator seeds for individual trajectories
echo "Generating $nsample random integers for random seeds"
echo "abin-randomint --seed $irandom0 --num $nsample > iran.dat"
abin-randomint --seed $irandom0 --num $nsample > iran.dat
if [[ $? -ne "0" ]];then
if ! abin-randomint --seed $irandom0 --num $nsample > iran.dat
then
echo "ERROR: Could not generate random numbers"
exit 1
fi

mkdir -p $folder
cp iseed0 "$abin_input" $folder

let offset=natom2*isample-natom2

if [[ "$rewrite" -eq "1" ]];then
rm -f $folder/$molname.$isample.*.sh
fi

(( offset=natom2*isample-natom2 ))

while [[ $i -le "$nsample" ]];do

let offset=offset+natom2
(( offset=offset+natom2 ))

if [[ -d "$folder/TRAJ.$i" ]];then
if [[ "$rewrite" -eq "1" ]];then

rm -r $folder/TRAJ.$i ; mkdir $folder/TRAJ.$i

else

echo "Trajectory number $i already exists!"
echo "Exiting..."
echo "If you want to overwrite it, first remove it:"
echo "'rm -r $folder/TRAJ.$i'"
exit 1

fi

else

mkdir $folder/TRAJ.$i

fi

# Copy all the files from the template directory
cp -r $inputdir/* $folder/TRAJ.$i


# Prepare input geometry and velocities

head -$offset $movie | tail -$natom2 > geom
if [[ ! -z "$veloc" ]];then
head -$offset $veloc | tail -$natom2 > veloc.in
fi

mv geom $folder/TRAJ.$i/mini.dat

if [[ ! -z "$veloc" ]];then
mv veloc.in $folder/TRAJ.$i/
head -$offset $movie | tail -$natom2 > $folder/TRAJ.$i/initial.xyz
if [[ -n "${veloc-}" ]];then
head -$offset "$veloc" | tail -$natom2 > $folder/TRAJ.$i/veloc.in
fi


## Now prepare input.in and r.abin
irandom=`head -$i iran.dat |tail -1`
## Now prepare input.in and r.abin
irandom=$(head -$i iran.dat |tail -1)

# TODO: Validate this step
sed -r "s/irandom *= *[0-9]+/irandom=$irandom/" $abin_input > $folder/TRAJ.$i/input.in

cat > $folder/TRAJ.$i/r.$molname.$i << EOF
cat > $folder/TRAJ.$i/r.$folder.$i << EOF
#!/bin/bash
JOBNAME=ABIN.$molname.${i}_$$_\${JOB_ID}
INPUTPARAM=input.in
INPUTGEOM=mini.dat
OUTPUT=output
INPUTGEOM=initial.xyz
EOF

if [[ ! -z $veloc ]];then
echo "INPUTVELOC=veloc.in" >> $folder/TRAJ.$i/r.$molname.$i
if [[ -n ${veloc-} ]];then
echo "INPUTVELOC=veloc.in" >> $folder/TRAJ.$i/r.$folder.$i
fi

grep -v -e '/bin/bash' -e "JOBNAME=" -e "INPUTPARAM=" -e "INPUTGEOM=" -e "INPUTVELOC=" $launch_script >> $folder/TRAJ.$i/r.$molname.$i

chmod 755 $folder/TRAJ.$i/r.$molname.$i

if [[ -n ${launch_script-} ]];then
grep -v -e '/bin/bash' -e "INPUTPARAM=" -e "INPUTGEOM=" -e "INPUTVELOC=" $launch_script >> $folder/TRAJ.$i/r.$folder.$i
else
if [[ -n ${veloc-} ]]; then
echo "$abin_exe -i input.in -x initial.xyz -v veloc.in > abin.out 2>&1" > $folder/TRAJ.$i/r.$folder.$i
else
echo "$abin_exe -i input.in -x initial.xyz > abin.out 2>&1" > $folder/TRAJ.$i/r.$folder.$i
fi
fi

echo "cd TRAJ.$i" >> $folder/$molname.$isample.$j.sh
echo "./r.$molname.$i" >> $folder/$molname.$isample.$j.sh
echo "cd $PWD/$folder" >> $folder/$molname.$isample.$j.sh
echo "(cd TRAJ.$i && bash r.$folder.$i)" >> $folder/$folder.$isample.$j.sh

#--Distribute calculations evenly between jobs for queue
# Distribute calculations evenly between jobs for queue
if [[ $remainder -le 0 ]];then
let ncalc=injob
ncalc=injob
else
let ncalc=injob+1
(( ncalc=injob+1 ))
fi
let w++
(( w++ ))
if [[ $w -eq $ncalc ]] && [[ $j -lt $jobs ]]; then
let j++
let remainder--
let w=0
w=0
(( j++ ))
(( remainder-- ))
fi
#---------------------------------------------------------------------------

let i++
(( i++ ))

done

# Submit jobs
k=1
if [[ ! -z "$submit" ]];then
cd $folder
while [[ $k -le $j ]]
do
if [[ -f $molname.$isample.$k.sh ]];then
$submit -V -cwd $molname.$isample.$k.sh
fi
let k++
done
if [[ -n "${submit_command-}" ]];then
cd $folder || exit 1
if [[ $submit_command = "bash" ]];then
echo "Launching $j calculations locally"
submit_command="nohup $submit_command"
else
echo "Submitting $j calculations with: $submit_command"
fi
while [[ $k -le $j ]]
do
if [[ -f $folder.$isample.$k.sh ]];then
$submit_command $folder.$isample.$k.sh &
fi
(( k++ ))
done
# Wait for submit commands to finish (they should be fast!)
if [[ $submit_command != "bash" ]]; then
wait
fi
fi