-
Notifications
You must be signed in to change notification settings - Fork 15
/
Makefile
54 lines (44 loc) · 2.33 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Generates the input files used by the pynvjitlink binding test suite
# Test binaries are built taking into account the CC of the GPU in the test machine
GPU_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv | grep -v compute_cap | sed 's/\.//')
GPU_CC ?= 75
# Use CC 7.0 as an alternative in fatbin testing, unless CC is 7.x
ifeq ($(shell echo "$(GPU_CC)" | cut -c1),7)
ALT_CC := 80
else
ALT_CC := 70
endif
# Gencode flags suitable for most tests
GENCODE := -gencode arch=compute_$(GPU_CC),code=sm_$(GPU_CC)
# Fatbin tests need to generate code for an additional compute capability
FATBIN_GENCODE := $(GENCODE) -gencode arch=compute_$(ALT_CC),code=sm_$(ALT_CC)
# LTO-IR tests need to generate for the LTO "architecture" instead
LTOIR_GENCODE := -gencode arch=lto_$(GPU_CC),code=lto_$(GPU_CC)
# Compile with optimization; use relocatable device code to preserve device
# functions in the final output
NVCC_FLAGS := -O3 -rdc true
# Flags specific to output type
CUBIN_FLAGS := $(GENCODE) --cubin
PTX_FLAGS := $(GENCODE) -ptx
OBJECT_FLAGS := $(GENCODE) -dc
LIBRARY_FLAGS := $(GENCODE) -lib
FATBIN_FLAGS := $(FATBIN_GENCODE) --fatbin
LTOIR_FLAGS := $(LTOIR_GENCODE) -dc
OUTPUT_DIR := ../pynvjitlink/tests
all:
@echo "GPU CC: $(GPU_CC)"
@echo "Alternative CC: $(ALT_CC)"
# Compile all test objects
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/undefined_extern.cubin undefined_extern.cu
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.cubin test_device_functions.cu
nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.fatbin test_device_functions.cu
nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ptx test_device_functions.cu
nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.o test_device_functions.cu
nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.a test_device_functions.cu
# Generate LTO-IR wrapped in a fatbin
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ltoir.o test_device_functions.cu
# Generate LTO-IR in a "raw" LTO-IR container
python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/test_device_functions.ltoir test_device_functions.cu
# We also want to test linking a .cu file; this needs no compilation,
# so copy it instead
cp test_device_functions.cu $(OUTPUT_DIR)