Merge pull request #1 from flatironinstitute/minor_build_generalization

Minor Build Generalization
blackwer · May 21, 2020 · 47596e6 · 47596e6
2 parents 2ac12cd + b1e54fb
commit 47596e6
Show file tree

Hide file tree

Showing 9 changed files with 1,393 additions and 39 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "cub"]
+	path = cub
+	url = [email protected]:NVlabs/cub.git
diff --git a/makefile → Makefile b/makefile → Makefile
@@ -2,14 +2,42 @@ CC=gcc
 CXX=g++
 NVCC=nvcc
 
+# We'll sacrifice longer compile times for broader compatibility out of the box.
+# Developer-users are suggested to change this in their make.inc, see:
+#   http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
+NVARCH = -arch=sm_70 \
+	-gencode=arch=compute_50,code=sm_50 \
+	-gencode=arch=compute_52,code=sm_52 \
+	-gencode=arch=compute_60,code=sm_60 \
+	-gencode=arch=compute_61,code=sm_61 \
+	-gencode=arch=compute_70,code=sm_70 \
+	-gencode=arch=compute_75,code=sm_75 \
+	-gencode=arch=compute_75,code=compute_75 
+
 CXXFLAGS= -DNEED_EXTERN_C  -fPIC -O3 -funroll-loops -march=native -g -std=c++11
 #NVCCFLAGS=-DINFO -DDEBUG -DRESULT -DTIME
-NVCCFLAGS= -std=c++11 -ccbin=$(CXX) -O3 -DTIME -arch=sm_60 \
+NVCCFLAGS= -std=c++11 -ccbin=$(CXX) -O3 -DTIME $(NVARCH) \
 	--default-stream per-thread -Xcompiler "$(CXXFLAGS)"
-#If using any card with architecture KXX, change to -arch=sm_30 (see GPUs 
-#supported section in https://en.wikipedia.org/wiki/CUDA for more info)
 #DEBUG add "-g -G" for cuda-gdb debugger
 
+# CUDA Related build dependencies
+CUDA_ROOT=/usr/local/cuda
+CUB_ROOT=./cub
+INC=-I$(CUDA_ROOT)/include \
+	-Icontrib/cuda_samples \
+	-I$(CUB_ROOT)
+NVCC_LIBS_PATH=-L$(CUDA_ROOT)/lib64
+
+FFTWNAME=fftw3
+FFTW=$(FFTWNAME)$(PRECSUFFIX)
+
+LIBS=-lm -lcudart -lstdc++ -lnvToolsExt -lcufft -lcuda -l$(FFTW)
+
+
+#############################################################
+# Allow the user to override any variable above this point. #
+-include make.inc
+
 ifeq ($(PREC),SINGLE)
 PRECSUFFIX=f
 CXXFLAGS+=-DSINGLE
@@ -18,16 +46,6 @@ else
 PRECSUFFIX=
 endif
 
-INC=-I/cm/shared/sw/pkg/devel/cuda/9.0.176/samples/common/inc/ \
-    -I/mnt/home/yshih/cub/ \
-    -I/cm/shared/sw/pkg/devel/cuda/9.0.176/include/
-LIBS_PATH=
-
-FFTWNAME=fftw3
-FFTW=$(FFTWNAME)$(PRECSUFFIX)
-
-LIBS=-lm -lcudart -lstdc++ -lnvToolsExt -lcufft -lcuda -l$(FFTW)
-
 LIBNAME=libcufinufft$(PRECSUFFIX)
 DYNAMICLIB=lib/$(LIBNAME).so
 STATICLIB=lib-static/$(LIBNAME).a
@@ -46,9 +64,8 @@ CUFINUFFTOBJS=src/2d/spreadinterp2d.o src/2d/cufinufft2d.o \
 	src/deconvolve_wrapper.o src/cufinufft.o src/profile.o \
 	src/3d/spreadinterp3d.o src/3d/spread3d_wrapper.o \
 	src/3d/interp3d_wrapper.o src/3d/cufinufft3d.o
-	
+
 CUFINUFFTCOBJS=src/cufinufftc.o
-#-include make.inc
 
 %.o: %.cpp
 	$(CXX) -c $(CXXFLAGS) $(INC) $< -o $@
@@ -69,24 +86,24 @@ spreadinterp_test: test/spreadinterp_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
 
 finufft2d_test: test/finufft2d_test.o finufft/finufft2d.o $(CUFINUFFTOBJS) \
 	$(FINUFFTOBJS)
-	$(CXX) $^ $(LIBS_PATH) $(LIBS) -o $@
+	$(CXX) $^ $(NVCC_LIBS_PATH) $(LIBS) -o $@
 
 cufinufft_test: test/cufinufft_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
-	$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
+	$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@
 
 cufinufft2d1_test: test/cufinufft2d1_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
-	$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
+	$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@
 
 cufinufft2d1many_test: test/cufinufft2d1many_test.o $(CUFINUFFTOBJS) \
 	$(FINUFFTOBJS)
-	$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
+	$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@
 
 cufinufft2d2_test: test/cufinufft2d2_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
-	$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
+	$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@
 
 cufinufft2d2many_test: test/cufinufft2d2many_test.o $(CUFINUFFTOBJS) \
 	$(FINUFFTOBJS)
-	$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
+	$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@
 
 spread3d: test/spread_3d.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
 	$(NVCC) $(NVCCFLAGS) $(LIBS) -o $@ $^
@@ -98,10 +115,10 @@ spreadinterp3d_test: test/spreadinterp3d_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
 	$(NVCC) $(NVCCFLAGS) $(LIBS) -o $@ $^
 
 cufinufft3d1_test: test/cufinufft3d1_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
-	$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@
+	$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@
 
 cufinufft3d2_test: test/cufinufft3d2_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
-	$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@
+	$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@
 
 lib: $(STATICLIB) $(DYNAMICLIB)
 
@@ -111,12 +128,12 @@ $(STATICLIB): $(CUFINUFFTOBJS) $(FINUFFTOBJS)
 	mkdir -p lib-static
 	ar rcs $(STATICLIB) $(CUFINUFFTOBJS) $(FINUFFTOBJS)
 $(DYNAMICLIB): $(CUFINUFFTOBJS) $(FINUFFTOBJS)
-	mkdir -p lib	
-	$(NVCC) -shared $(NVCCFLAGS) $(CUFINUFFTOBJS) $(FINUFFTOBJS) -o $(DYNAMICLIB) $(LIBS) 
+	mkdir -p lib
+	$(NVCC) -shared $(NVCCFLAGS) $(CUFINUFFTOBJS) $(FINUFFTOBJS) -o $(DYNAMICLIB) $(LIBS)
 
 $(DYNAMICCLIB): $(CUFINUFFTCOBJS) $(STATICLIB)
 	mkdir -p lib
-	gcc -shared -o $(DYNAMICCLIB) $(CUFINUFFTCOBJS) $(STATICLIB) $(LIBS)
+	gcc -shared -o $(DYNAMICCLIB) $(CUFINUFFTCOBJS) $(STATICLIB) $(NVCC_LIBS_PATH) $(LIBS)
 
 all: spread2d interp2d spreadinterp_test finufft2d_test cufinufft2d1_test \
 	cufinufft2d2_test cufinufft2d1many_test cufinufft2d2many_test spread3d \
@@ -148,6 +165,5 @@ clean:
 	rm -f spreadinterp_test
 	rm -f spreadinterp3d_test
 	rm -f example2d1
-	rm -f lib/*.so
-	rm -f lib-static/*.a
-	rmdir lib lib-static
+	rm -rf lib
+	rm -rf lib-static
diff --git a/README.md b/README.md
@@ -1,16 +1,19 @@
 # cuFINUFFT
-A GPU implementation of 2,3 dimension type 1,2 non-uniform FFT based on FINUFFT (https://github.com/flatironinstitute/finufft). 
+A GPU implementation of 2, 3 dimension type 1, 2 non-uniform FFT based on [FINUFFT][1].
 
-This is a work as a summer intern at Flatiron Institute advised by CCM project leader Alex Barnett.
+This is a work from Melody Shih's internship at Flatiron Institute, advised by CCM project leader Alex Barnett.
 
 
 ### Code dependency
- - CUB library (https://github.com/NVlabs/cub)
+ - [CUB Library][3]. This is managed by a git submodule, note the clone command below.
 
 ### Installation
- - Get the CUB library - ```git clone https://github.com/NVlabs/cub.git```
- - Modify make.inc - set the ```INC``` with ```-I$(CUDA_DIR)/samples/common/inc/ -I$(CUDA_DIR)/include/ -I$(CUB_DIR)```
- - Compile - ```make all```
+ - Get this code and dependency -
+ ```git clone --recurse-submodules https://github.com/flatironinstitute/cufinufft.git```
+ - Review the `makefile`. - If you need to customize build settings, create and edit a `make.inc`.  Example:
+   - To override the standard CUDA `/usr/local/cuda` location your `make.inc` should contain: ```CUDA_ROOT=/your/path/to/cuda```.
+   - Two examples are provided, one for IBM machines (make.inc.power9), and another for the Courant Institute cluster (make.inc.CIMS).
+ - Compile - ```make all -j```
  - Run a test code - ``` ./cufinufft2d1_test 2 128 128 10 1e-6```
 
 ### Interface
@@ -28,4 +31,10 @@ cuFINUFFT API contains 5 stages:
  - DEBUG - debug mode outputs all the middle stages' result
 
 ### Other
- - If you're running the code on GPU with Compute Capability less than 5.0 (ex. Kepler, Fermi), change the ```-arch=sm_50``` flag to lower number. (See http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) 
+ - If you are interested in optimizing for GPU Compute Capability,
+ you may want to specicfy ```NVARCH=-arch=sm_XX``` in your make.inc to reduce compile times,
+ or for other performance reasons. See [Matching SM Architectures][2].
+
+[1]: https://github.com/flatironinstitute/finufft
+[2]: http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
+[3]: https://github.com/NVlabs/cub