Cpac addition (#395)

* Dockerfile edit * Add cpac-necessary libraries to Dockerfile * add initial functional pipeline scripts * fix typo * fix docker typos * fix cpac directory organization * add no-caching on pip installations and apt-get clean * adapt directorysweeper to handle fmri data, initial fmri pipeline funcitons added * initial working prototype of m2g_func_worker * add s3 push capabilities to functional pipeline * add input arguments for functional pipeline * add info to acquisition input argument * update m2g_cloud funcitons for new functional pipeline * typo fix * Update m2g_pipeline.yaml * change script to go up to 10 GB memory per participant * changes to cpac parameters to allow ec2 instances to function * add functional pipeline local batch running * ec2 test * remove memory limitation * add gigabyte control to f-pipeline * add n_cpu control * fix cpac terminal call typo * derek_changes * final configuration edits * add more specific parameters to crawl_bucket * Priebe edits (#396) * necessary changes to allow large parcellations to run * typo fix * typo fix * allow multiple parcellations to be run * add csv import * version lock cpac * Update Dockerfile Fix dockerfile to take from deploy branch of neuroparc and m2g * Update m2g_func.py Add function descriptions
neurodata · Jun 29, 2020 · be29587 · be29587
1 parent 31e7d6b
commit be29587
Show file tree

Hide file tree

Showing 13 changed files with 1,470 additions and 76 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,6 @@
 FROM neurodata/fsl_1604:0.0.1
-LABEL author="Derek Pisner"
-LABEL maintainer="dpisner@utexas.edu"
+LABEL author="Ross Lawrence, Alex Loftus"
+LABEL maintainer="rlawre18@jhu.edu"
 
 #--------Environment Variables-----------------------------------------------#
 ENV M2G_URL https://github.com/neurodata/m2g.git
@@ -25,6 +25,8 @@ RUN apt-get update && \
     apt-get install -y python3.6 python3.6-dev && \
     curl https://bootstrap.pypa.io/get-pip.py | python3.6
 
+RUN apt-get install -y python2.7 python-pip
+
 RUN pip3 install --upgrade pip
 
 # Get neurodebian config
@@ -58,13 +60,41 @@ RUN mkdir -p /opt/afni && \
     rm -rf afni.tar.gz
 ENV PATH=/opt/afni:$PATH
 
+## --------CPAC INSTALLS-----------------------------------------------------#
+RUN apt-get install -y graphviz graphviz-dev
+
+# Setup FSL environment
+ENV FSLDIR=/usr/share/fsl/5.0 \
+    FSLOUTPUTTYPE=NIFTI_GZ \
+    FSLMULTIFILEQUIT=TRUE \
+    POSSUMDIR=/usr/share/fsl/5.0 \
+    LD_LIBRARY_PATH=/usr/lib/fsl/5.0:$LD_LIBRARY_PATH \
+    FSLTCLSH=/usr/bin/tclsh \
+    FSLWISH=/usr/bin/wish \
+    PATH=/usr/lib/fsl/5.0:$PATH
+
+# install CPAC resources into FSL
+RUN curl -sL http://fcon_1000.projects.nitrc.org/indi/cpac_resources.tar.gz -o /tmp/cpac_resources.tar.gz && \
+    tar xfz /tmp/cpac_resources.tar.gz -C /tmp && \
+    cp -n /tmp/cpac_image_resources/MNI_3mm/* $FSLDIR/data/standard && \
+    cp -n /tmp/cpac_image_resources/MNI_4mm/* $FSLDIR/data/standard && \
+    cp -n /tmp/cpac_image_resources/symmetric/* $FSLDIR/data/standard && \
+    cp -n /tmp/cpac_image_resources/HarvardOxford-lateral-ventricles-thr25-2mm.nii.gz $FSLDIR/data/atlases/HarvardOxford && \
+    cp -nr /tmp/cpac_image_resources/tissuepriors/2mm $FSLDIR/data/standard/tissuepriors && \
+    cp -nr /tmp/cpac_image_resources/tissuepriors/3mm $FSLDIR/data/standard/tissueprior
+
+
 #--------M2G SETUP-----------------------------------------------------------#
 # setup of python dependencies for m2g itself, as well as file dependencies
 RUN \
-    pip3.6 install numpy nibabel scipy python-dateutil pandas boto3 awscli matplotlib nilearn sklearn pandas cython vtk pyvtk fury awscli requests ipython duecredit graspy scikit-image networkx dipy pybids
+    pip3.6 install --no-cache-dir virtualenv numpy nibabel scipy python-dateutil pandas boto3 awscli
+RUN \
+    pip3.6 install --no-cache-dir matplotlib nilearn sklearn pandas cython vtk pyvtk fury
+RUN \
+    pip3.6 install --no-cache-dir awscli requests ipython duecredit graspy scikit-image networkx dipy pybids
 
 RUN \
-    pip3.6 install plotly==1.12.9 setuptools>=40.0 configparser>=3.7.4
+    pip3.6 install --no-cache-dir plotly==1.12.9 setuptools>=40.0 configparser>=3.7.4
 
 WORKDIR /
 
@@ -78,19 +108,13 @@ RUN mkdir /output && \
 RUN mkdir /m2g_atlases
 
 RUN \
-    git lfs clone $M2G_ATLASES && \
+    git lfs clone https://github.com/neurodata/neuroparc && \
     mv /neuroparc/atlases /m2g_atlases && \
-    rm -rf /neuroparc && \
-    rm -rf /m2g_atlases/label/Human/DS* && \
-    rm -rf /m2g_atlases/label/Human/pp264* && \
-    rm -rf /m2g_atlases/label/Human/princeton* && \
-    rm -rf /m2g_atlases/label/Human/slab* && \
-    rm -rf /m2g_atlases/label/Human/hemispheric
-
+    rm -rf /neuroparc
 RUN chmod -R 777 /m2g_atlases
 
 # Grab m2g from deploy.
-RUN git clone -b deploy $M2G_URL /m2g && \
+RUN git clone $M2G_URL /m2g && \
     cd /m2g && \
     pip3.6 install .
 RUN chmod -R 777 /usr/local/bin/m2g_bids
@@ -104,3 +128,29 @@ RUN ldconfig
 
 # and add it as an entrypoint
 ENTRYPOINT ["m2g"]
+
+# Clear apt-get caches (try adding sudo)
+RUN apt-get clean
+
+# Set up the functional pipeline
+RUN cd / && \
+    git clone --branch v1.6.1 --single-branch https://github.com/FCP-INDI/C-PAC.git && \
+    mkdir /code && \
+    mv /C-PAC/dev/docker_data/* /code/ && \
+    mv /C-PAC/* /code/ && \
+    rm -R /C-PAC && \
+    chmod +x /code/run.py && \
+    cd /
+
+# due to cpac's requirments.txt being out of order, nilearn is installed before scipy and scikit-learn (which it needs)
+RUN virtualenv -p /usr/bin/python2.7 venv && \
+    . venv/bin/activate && \
+    pip install --upgrade pip==9.0.1 && \
+    ls /code && \
+    pip install scipy==1.2.1 --no-cache-dir && \
+    pip install scikit-learn==0.19.1 --no-cache-dir && \
+    pip install -r /code/requirements.txt --no-cache-dir && \
+    pip install -e /code && \
+    pip install torch==1.2.0 --no-cache-dir && \
+    pip install torch==1.2.0 torchvision==0.4.0 -f https://download.pytorch.org/whl/torch_stable.html --no-cache-dir && \
+    pip install xvfbwrapper
diff --git a/m2g/__init__.py b/m2g/__init__.py
@@ -11,7 +11,7 @@
 
 # to call `m2g.graph`, etc
 __all__ = ["graph", "preproc", "register", "track"]  # modules
-__all__.extend(["scripts", "stats", "utils"])  # subpackages
+__all__.extend(["functional","scripts", "stats", "utils"])  # subpackages
 
 # import everything listed in __all__
 # TODO: maybe still change this

diff --git a/m2g/functional/__init__.py b/m2g/functional/__init__.py
@@ -0,0 +1,10 @@
+"""
+m2g.functional
+~~~~~~~~~~~~
+
+Contains scripts for running the functional pipeline.
+
+m2g_func : top-level pipeline entrypoint
+"""
+
+from . import m2g_func
diff --git a/m2g/functional/m2g_func.py b/m2g/functional/m2g_func.py
@@ -0,0 +1,77 @@
+import subprocess
+import yaml
+from m2g.utils.gen_utils import run
+
+def make_dataconfig(input_dir, sub, ses, anat, func, acquisition='alt+z', tr=2.0):
+    """Generates the data_config file needed by cpac
+    
+    Arguments:
+        input_dir {str} -- Path of directory containing input files
+        sub {int} -- subject number
+        ses {int} -- session number
+        anat {str} -- Path of anatomical nifti file
+        func {str} -- Path of functional nifti file
+        acquisition {str} -- acquisition method for funcitonal scan
+        tr {float} -- TR (seconds) of functional scan
+    
+    Returns:
+        None
+    """
+
+    Data = [{
+        'subject_id': sub,
+        'unique_id': f'ses-{ses}',
+        'anat': anat,
+        'func': {
+                'rest_run-1': {
+			        'scan': func,
+			        'scan_parameters': {
+				    	'acquisition': acquisition,
+				    	'tr': tr
+			    }
+		    }
+	    }    
+    }]
+
+    config_file = f'{input_dir}/data_config.yaml'
+    with open(config_file,'w',encoding='utf8') as outfile:
+        yaml.dump(Data, outfile, default_flow_style=False)
+
+    return config_file
+
+
+def make_script(input_dir, output_dir, subject, session, data_config, pipeline_config, mem_gb, n_cpus):
+    cpac_script = '/root/.m2g/cpac_script.sh'
+    with open(cpac_script,'w+',encoding='utf8') as script:
+        script.write(f'''#! /bin/bash
+        . /venv/bin/activate
+        python /code/run.py --data_config_file {data_config} --pipeline_file {pipeline_config} --n_cpus {n_cpus} --mem_gb {mem_gb} {input_dir} {output_dir} participant
+        ''')
+
+    run(f'chmod +x {cpac_script}')
+
+    return cpac_script
+
+
+
+def m2g_func_worker(input_dir, output_dir, sub, ses, anat, bold, acquisition, tr, mem_gb, n_cpus):
+    """Creates the requisite files to run CPAC, then calls CPAC and runs it in a terminal
+    
+    Arguments:
+        input_dir {str} -- Path to input directory
+        output_dir {str} -- Path to output directory
+        sub {int} -- subject number
+        ses {int} -- session number
+        anat {str} -- Path of anatomical nifti file
+        bold {str} -- Path of functional nifti file
+        acquisition {str} -- Acquisition method for funcitional scans
+        tr {str} -- TR time, in seconds
+    """
+
+    pipeline_config='/m2g/m2g/functional/m2g_pipeline.yaml'
+
+    data_config = make_dataconfig(input_dir, sub, ses, anat, bold, acquisition, tr)
+    cpac_script = make_script(input_dir, output_dir, sub, ses, data_config, pipeline_config,mem_gb, n_cpus)
+
+    # Run pipeline
+    subprocess.call([cpac_script], shell=True)