-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
executable file
·112 lines (92 loc) · 2.51 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#
# Makefile for building Matrix Multiplication under CUDA
#
SORKEN = $(shell hostname | grep sorken | wc -c)
SORKEN-COMPUTE = $(shell hostname | grep compute | wc -c)
STAMPEDE = $(shell hostname | grep stampede | wc -c)
AWS = $(shell hostname | grep "ip-" | wc -c)
ifneq ($(SORKEN), 0)
include $(PUB)/Arch/arch.cuda.gnu
else
ifneq ($(SORKEN-COMPUTE), 0)
include $(PUB)/Arch/arch.cuda.gnu
else
ifneq ($(STAMPEDE), 0)
include $(PUB)/Arch/arch.stampede.cuda
else
ifneq ($(AWS), 0)
include $(PUB)/Arch/arch.cuda.gnu
endif
endif
endif
endif
# Set usecache=1 to build the variant that uses cache
# By default, build the code to use shared memory
ifeq ($(usecache), 1)
C++FLAGS += -DUSE_CACHE
CFLAGS += -DUSE_CACHE
NVCCFLAGS += -DUSE_CACHE
endif
# Set gprof=1 on make command line to compile for gprof profiler
ifeq ($(gprof), 1)
CFLAGS += -g -pg
NVCCFLAGS += -g -pg
C++FLAGS += -g -pg
LDFLAGS += -g -pg
endif
# Set debug=1 on make command line to keep symbol table info for gdb/cachegrind
ifeq ($(debug), 1)
NVCCFLAGS += -g -G
LDFLAGS += -g -G
endif
# If you want to compile for single precision,
# specify single=1 on the "make" command line
ifeq ($(single), 1)
else
C++FLAGS += -D_DOUBLE
CFLAGS += -D_DOUBLE
NVCCFLAGS += -D_DOUBLE
endif
# Keep arround compiler output files, including the ptx assembler
ifeq ($(keep), 1)
NVCCFLAGS += -keep
NVCCFLAGS += --ptx
endif
# If you want to use the CUDA Timer
# specify cuda_timer=1 on the "make" command line
# NVCCFLAGS += -DCUDA_TIMER
ifeq ($(cuda_timer), 1)
NVCCFLAGS += -DCUDA_TIMER
endif
# Uncomment if you want to report resource requirements (registers etc)
NVCCFLAGS += --ptxas-options=-v
# NVCCFLAGS += --opencc-options -LIST:source=on
# You can set the thread block geometry by specifying bx= and by= on
# the make command line, e.g. make bx=16 by=32
# This feature is useful for the shared memory variant but
# not for the naive variant
# Set up for a default block size of 16 x 16
ifdef bx
DIM_X = -DBLOCKDIM_X=$(bx)
else
DIM_X = -DBLOCKDIM_X=16
endif
ifdef by
DIM_Y = -DBLOCKDIM_Y=$(by)
else
DIM_Y = -DBLOCKDIM_Y=16
endif
ifdef naive
NVCCFLAGS += "-DNAIVE"
CFLAGS += "-DNAIVE"
endif
BLOCKING = $(DIM_X) $(DIM_Y)
NVCCFLAGS += $(BLOCKING)
CFLAGS += $(BLOCKING)
C++FLAGS += $(BLOCKING)
APP=mmpy
OBJECTS = mmpy.o mmpy_host.o genMatrix.o cmdLine.o Timer.o utils.o Report.o setGrid.o
$(APP): $(OBJECTS) mmpy_kernel.o
$(NVCC) -o $@ $(LDFLAGS) $(OBJECTS) $(LDLIBS)
clean:
rm -f *.linkinfo *.o *.vcproj $(APP)