Skip to content

Install GNetLMM in GWDG cluster

Saikat Banerjee edited this page Jan 24, 2019 · 11 revisions

This follows my own directory structure. Modify accordingly.

  1. Clone the repository.
cd ~/packages
git clone [email protected]:PMBio/GNetLMM.git
  1. Check python environment, switch to Python 2.7 (have to install if not available) and install required packages.
# conda env remove --name myenv # remove existing installation if required
conda update conda
conda info --envs
conda create --name py27 python=2.7 scipy numpy h5py matplotlib
source activate py27
  1. Unload modules. My account loads Intel ICC and MKL modules on ssh login. These have to be unloaded for installing limix which requires them from the Python environment.
module purge
  1. Install legacy limix from Github. The official limix or even the official limix-legacy do not work [checked].
cd ~/packages
git clone [email protected]:PMBio/limix.git
cd limix
conda install cython
python setup.py build
python setup.py install
cd ..
  1. Within GNetLMM, copy the binaries in the outermost folder. These binaries are Python files which import modules using full path, which are only visible if they are in the parent folder.
cd ~/packages/GNetLMM
cp GNetLMM/bin/gNetLMM_* .

Testing

There is a demo for running GNetLMM, but the demo needs some modified arguments and filepaths for running properly. Consult the demo for understanding, and use the following code for running the demo. Official instructions for preprocessing, running GNetLMM and postprocessing were followed.

1. Set the environment

source activate py27
module load plink/1.90
2. Create the output folder, set filenames and parameters.

cd ~/packages/GNetLMM
mkdir out
CURDIR=`pwd`
BFILE=${CURDIR}/data/1000G_chr22/chrom22_subsample20_maf0.10 #specify here bed basename
FFILE=${CURDIR}/data/1000G_chr22/ones.txt
PFILE=${CURDIR}/out/pheno
CFILE=${CURDIR}/out/chrom22
ASSOC0FILE=${CURDIR}/out/lmm
GFILE=${CURDIR}/out/genes
ANCHOR_THRESH=1e-6
ANCHORFILE=${CURDIR}/out/cisanchor_thresh1e-6_wnd2000.txt
WINDOW=2000
VFILE=${CURDIR}/out/vstructures_thresh1e-6_wnd2000
ASSOCFILE=${CURDIR}/out/gnetlmm_thresh1e-6_wnd2000
PLOTFILE=${CURDIR}/out/power.pdf
3. Simulate phenotypes and create kinship matrix (preprocessing).

./gNetLMM_simPheno --bfile $BFILE --pfile $PFILE
./gNetLMM_preprocess --plink_path plink --bfile $BFILE --cfile $CFILE --compute_covariance
4. Run GNetLMM.

# Run initial association scan and merge the results
for i in $(seq 0 10000 40000)
do
    ./gNetLMM_analyse --initial_scan --bfile $BFILE --pfile $PFILE --ffile $FFILE --cfile $CFILE.cov --assoc0file   $ASSOC0FILE --startSnpIdx $i --nSnps 10000 
done
./gNetLMM_analyse --merge_assoc0_scan  --assoc0file $ASSOC0FILE --nSnps 10000 --bfile $BFILE

# Compute the marginal gene-gene correlations when splitting the genes in groups of size 25
for i in $(seq 0 25 100)
do
    ./gNetLMM_analyse --gene_corr --pfile $PFILE --gfile $GFILE  --startTraitIdx $i --nTraits 25
done

# Merge the results obtained by the partial correlation scans
./gNetLMM_analyse --merge_corr  --gfile $GFILE  --pfile $PFILE --nTraits 25

# Compute the cis-anchors
./gNetLMM_analyse --compute_anchors  --bfile $BFILE --pfile $PFILE --assoc0file $ASSOC0FILE --anchorfile $ANCHORFILE --anchor_thresh=$ANCHOR_THRESH  --window=$WINDOW --cis

# Find the V-structures
for i in $(seq 0 10 90)
do
    ./gNetLMM_analyse --find_vstructures --bfile $BFILE --pfile $PFILE --gfile $GFILE --anchorfile $ANCHORFILE --assoc0file $ASSOC0FILE --vfile $VFILE --window $WINDOW --startTraitIdx $i --nTraits 10
done
./gNetLMM_postprocess --concatenate --infiles $VFILE --outfile $VFILE

# Update the associations for which V-structures are found
for i in $(seq 0 10 90)
do
    ./gNetLMM_analyse --update_assoc --bfile $BFILE --pfile $PFILE --cfile $CFILE.cov --ffile $FFILE --vfile $VFILE --assocfile $ASSOCFILE --startTraitIdx $i --nTraits 10
done
./gNetLMM_postprocess --concatenate --infiles $ASSOCFILE  --outfile $ASSOCFILE
5. Postprocess.

./gNetLMM_postprocess --merge_assoc --assoc0file $ASSOC0FILE --assocfile $ASSOCFILE
./gNetLMM_postprocess --nice_output --bfile $BFILE --pfile $PFILE --vfile $VFILE --outfile $VFILE.nice --assoc0file $ASSOC0FILE --assocfile $ASSOCFILE
6. Run the algorithm again, this time blocking the causal chain anchor snp → anchor gene → focal gene by conditioning on the focal gene.

for i in $(seq 0 10 90)
do
    ./gNetLMM_analyse --block_assoc --bfile $BFILE --pfile $PFILE --cfile $CFILE.cov --ffile $FFILE --vfile $VFILE --assocfile $ASSOCFILE.block --startTraitIdx $i --nTraits 10
done
./gNetLMM_postprocess --concatenate --infiles $ASSOCFILE.block  --outfile $ASSOCFILE.block
./gNetLMM_postprocess --merge_assoc --assoc0file $ASSOC0FILE --assocfile $ASSOCFILE.block
./gNetLMM_postprocess --nice_output --bfile $BFILE --pfile $PFILE --vfile $VFILE --outfile $VFILE.nice --assocfile $ASSOCFILE --assoc0file $ASSOC0FILE --blockfile $ASSOCFILE.block
./gNetLMM_postprocess --plot_power --assocfile $ASSOCFILE --assoc0file $ASSOC0FILE --plotfile $PLOTFILE --pfile $PFILE --bfile $BFILE --window $WINDOW --blockfile $ASSOCFILE.block
Clone this wiki locally