Skip to content

Commit

Permalink
Merge pull request #1 from flatironinstitute/minor_build_generalization
Browse files Browse the repository at this point in the history
Minor Build Generalization
  • Loading branch information
garrettwrong authored May 21, 2020
2 parents 2ac12cd + b1e54fb commit 47596e6
Show file tree
Hide file tree
Showing 9 changed files with 1,393 additions and 39 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "cub"]
path = cub
url = [email protected]:NVlabs/cub.git
74 changes: 45 additions & 29 deletions makefile → Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,42 @@ CC=gcc
CXX=g++
NVCC=nvcc

# We'll sacrifice longer compile times for broader compatibility out of the box.
# Developer-users are suggested to change this in their make.inc, see:
# http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
NVARCH = -arch=sm_70 \
-gencode=arch=compute_50,code=sm_50 \
-gencode=arch=compute_52,code=sm_52 \
-gencode=arch=compute_60,code=sm_60 \
-gencode=arch=compute_61,code=sm_61 \
-gencode=arch=compute_70,code=sm_70 \
-gencode=arch=compute_75,code=sm_75 \
-gencode=arch=compute_75,code=compute_75

CXXFLAGS= -DNEED_EXTERN_C -fPIC -O3 -funroll-loops -march=native -g -std=c++11
#NVCCFLAGS=-DINFO -DDEBUG -DRESULT -DTIME
NVCCFLAGS= -std=c++11 -ccbin=$(CXX) -O3 -DTIME -arch=sm_60 \
NVCCFLAGS= -std=c++11 -ccbin=$(CXX) -O3 -DTIME $(NVARCH) \
--default-stream per-thread -Xcompiler "$(CXXFLAGS)"
#If using any card with architecture KXX, change to -arch=sm_30 (see GPUs
#supported section in https://en.wikipedia.org/wiki/CUDA for more info)
#DEBUG add "-g -G" for cuda-gdb debugger

# CUDA Related build dependencies
CUDA_ROOT=/usr/local/cuda
CUB_ROOT=./cub
INC=-I$(CUDA_ROOT)/include \
-Icontrib/cuda_samples \
-I$(CUB_ROOT)
NVCC_LIBS_PATH=-L$(CUDA_ROOT)/lib64

FFTWNAME=fftw3
FFTW=$(FFTWNAME)$(PRECSUFFIX)

LIBS=-lm -lcudart -lstdc++ -lnvToolsExt -lcufft -lcuda -l$(FFTW)


#############################################################
# Allow the user to override any variable above this point. #
-include make.inc

ifeq ($(PREC),SINGLE)
PRECSUFFIX=f
CXXFLAGS+=-DSINGLE
Expand All @@ -18,16 +46,6 @@ else
PRECSUFFIX=
endif

INC=-I/cm/shared/sw/pkg/devel/cuda/9.0.176/samples/common/inc/ \
-I/mnt/home/yshih/cub/ \
-I/cm/shared/sw/pkg/devel/cuda/9.0.176/include/
LIBS_PATH=

FFTWNAME=fftw3
FFTW=$(FFTWNAME)$(PRECSUFFIX)

LIBS=-lm -lcudart -lstdc++ -lnvToolsExt -lcufft -lcuda -l$(FFTW)

LIBNAME=libcufinufft$(PRECSUFFIX)
DYNAMICLIB=lib/$(LIBNAME).so
STATICLIB=lib-static/$(LIBNAME).a
Expand All @@ -46,9 +64,8 @@ CUFINUFFTOBJS=src/2d/spreadinterp2d.o src/2d/cufinufft2d.o \
src/deconvolve_wrapper.o src/cufinufft.o src/profile.o \
src/3d/spreadinterp3d.o src/3d/spread3d_wrapper.o \
src/3d/interp3d_wrapper.o src/3d/cufinufft3d.o

CUFINUFFTCOBJS=src/cufinufftc.o
#-include make.inc

%.o: %.cpp
$(CXX) -c $(CXXFLAGS) $(INC) $< -o $@
Expand All @@ -69,24 +86,24 @@ spreadinterp_test: test/spreadinterp_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)

finufft2d_test: test/finufft2d_test.o finufft/finufft2d.o $(CUFINUFFTOBJS) \
$(FINUFFTOBJS)
$(CXX) $^ $(LIBS_PATH) $(LIBS) -o $@
$(CXX) $^ $(NVCC_LIBS_PATH) $(LIBS) -o $@

cufinufft_test: test/cufinufft_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@

cufinufft2d1_test: test/cufinufft2d1_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@

cufinufft2d1many_test: test/cufinufft2d1many_test.o $(CUFINUFFTOBJS) \
$(FINUFFTOBJS)
$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@

cufinufft2d2_test: test/cufinufft2d2_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@

cufinufft2d2many_test: test/cufinufft2d2many_test.o $(CUFINUFFTOBJS) \
$(FINUFFTOBJS)
$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) -o $@
$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) -o $@

spread3d: test/spread_3d.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(NVCC) $(NVCCFLAGS) $(LIBS) -o $@ $^
Expand All @@ -98,10 +115,10 @@ spreadinterp3d_test: test/spreadinterp3d_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(NVCC) $(NVCCFLAGS) $(LIBS) -o $@ $^

cufinufft3d1_test: test/cufinufft3d1_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@
$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@

cufinufft3d2_test: test/cufinufft3d2_test.o $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(NVCC) $^ $(NVCCFLAGS) $(LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@
$(NVCC) $^ $(NVCCFLAGS) $(NVCC_LIBS_PATH) $(LIBS) $(LIBS_CUFINUFFT) -o $@

lib: $(STATICLIB) $(DYNAMICLIB)

Expand All @@ -111,12 +128,12 @@ $(STATICLIB): $(CUFINUFFTOBJS) $(FINUFFTOBJS)
mkdir -p lib-static
ar rcs $(STATICLIB) $(CUFINUFFTOBJS) $(FINUFFTOBJS)
$(DYNAMICLIB): $(CUFINUFFTOBJS) $(FINUFFTOBJS)
mkdir -p lib
$(NVCC) -shared $(NVCCFLAGS) $(CUFINUFFTOBJS) $(FINUFFTOBJS) -o $(DYNAMICLIB) $(LIBS)
mkdir -p lib
$(NVCC) -shared $(NVCCFLAGS) $(CUFINUFFTOBJS) $(FINUFFTOBJS) -o $(DYNAMICLIB) $(LIBS)

$(DYNAMICCLIB): $(CUFINUFFTCOBJS) $(STATICLIB)
mkdir -p lib
gcc -shared -o $(DYNAMICCLIB) $(CUFINUFFTCOBJS) $(STATICLIB) $(LIBS)
gcc -shared -o $(DYNAMICCLIB) $(CUFINUFFTCOBJS) $(STATICLIB) $(NVCC_LIBS_PATH) $(LIBS)

all: spread2d interp2d spreadinterp_test finufft2d_test cufinufft2d1_test \
cufinufft2d2_test cufinufft2d1many_test cufinufft2d2many_test spread3d \
Expand Down Expand Up @@ -148,6 +165,5 @@ clean:
rm -f spreadinterp_test
rm -f spreadinterp3d_test
rm -f example2d1
rm -f lib/*.so
rm -f lib-static/*.a
rmdir lib lib-static
rm -rf lib
rm -rf lib-static
23 changes: 16 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
# cuFINUFFT
A GPU implementation of 2,3 dimension type 1,2 non-uniform FFT based on FINUFFT (https://github.com/flatironinstitute/finufft).
A GPU implementation of 2, 3 dimension type 1, 2 non-uniform FFT based on [FINUFFT][1].

This is a work as a summer intern at Flatiron Institute advised by CCM project leader Alex Barnett.
This is a work from Melody Shih's internship at Flatiron Institute, advised by CCM project leader Alex Barnett.


### Code dependency
- CUB library (https://github.com/NVlabs/cub)
- [CUB Library][3]. This is managed by a git submodule, note the clone command below.

### Installation
- Get the CUB library - ```git clone https://github.com/NVlabs/cub.git```
- Modify make.inc - set the ```INC``` with ```-I$(CUDA_DIR)/samples/common/inc/ -I$(CUDA_DIR)/include/ -I$(CUB_DIR)```
- Compile - ```make all```
- Get this code and dependency -
```git clone --recurse-submodules https://github.com/flatironinstitute/cufinufft.git```
- Review the `makefile`. - If you need to customize build settings, create and edit a `make.inc`. Example:
- To override the standard CUDA `/usr/local/cuda` location your `make.inc` should contain: ```CUDA_ROOT=/your/path/to/cuda```.
- Two examples are provided, one for IBM machines (make.inc.power9), and another for the Courant Institute cluster (make.inc.CIMS).
- Compile - ```make all -j```
- Run a test code - ``` ./cufinufft2d1_test 2 128 128 10 1e-6```

### Interface
Expand All @@ -28,4 +31,10 @@ cuFINUFFT API contains 5 stages:
- DEBUG - debug mode outputs all the middle stages' result

### Other
- If you're running the code on GPU with Compute Capability less than 5.0 (ex. Kepler, Fermi), change the ```-arch=sm_50``` flag to lower number. (See http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
- If you are interested in optimizing for GPU Compute Capability,
you may want to specicfy ```NVARCH=-arch=sm_XX``` in your make.inc to reduce compile times,
or for other performance reasons. See [Matching SM Architectures][2].

[1]: https://github.com/flatironinstitute/finufft
[2]: http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
[3]: https://github.com/NVlabs/cub
Loading

0 comments on commit 47596e6

Please sign in to comment.