This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6.8k
/
Copy pathlinux_gpu.cmake
133 lines (109 loc) · 5.21 KB
/
linux_gpu.cmake
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#-------------------------------------------------------------------------------
# Template configuration for compiling MXNet
#
# If you want to change the configuration, please use the following steps.
# Assume you are on the root directory of mxnet. First copy this file so that
# any local changes will be ignored by git
#
# $ cp config/linux_gpu.cmake config.cmake
#
# Next modify the according entries, and then compile by
#
# $ mkdir build; cd build
# $ cmake ..
# $ cmake --build .
#
# Specify `cmake --build . --parallel N` to set the number of parallel compilation jobs.
# Default is derived from CPUs available.
#
#-------------------------------------------------------------------------------
#---------------------------------------------
# GPU support
#---------------------------------------------
set(USE_CUDA ON CACHE BOOL "Build with CUDA support")
set(USE_CUDNN ON CACHE BOOL "Build with cudnn support, if found")
# Target NVIDIA GPU achitecture.
# Valid options are "Auto" for autodetection, "All" for all available
# architectures or a list of architectures by compute capability number, such as
# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing".
# The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to
# obtain the compilation flags for nvcc.
#
# When compiling on a machine without GPU, autodetection will fail and you
# should instead specify the target architecture manually to avoid excessive
# compilation times.
set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture")
#---------------------------------------------
# Common libraries
#---------------------------------------------
set(USE_BLAS "open" CACHE STRING "BLAS Vendor")
set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.")
set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
set(USE_MKL_IF_AVAILABLE ON CACHE BOOL "Use Intel MKL if found")
set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
#---------------------
# Compilers
#--------------------
set(CMAKE_GENERATOR "Ninja" CACHE STRING "Build Tool Generator used by CMake")
# Compilers are usually autodetected. Uncomment and modify the next 3 lines to
# choose manually:
# set(CMAKE_C_COMPILER "" CACHE BOOL "C compiler")
# set(CMAKE_CXX_COMPILER "" CACHE BOOL "C++ compiler")
# set(CMAKE_CUDA_COMPILER "" CACHE BOOL "Cuda compiler (nvcc)")
# Uncomment the following line to compile with debug information
# set(CMAKE_BUILD_TYPE Debug CACHE STRING "CMake build type")
#---------------------------------------------
# CPU instruction sets: The support is autodetected if turned ON
#---------------------------------------------
set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
set(USE_F16C ON CACHE BOOL "Build with x86 F16C instruction support")
#----------------------------
# distributed computing
#----------------------------
set(USE_DIST_KVSTORE OFF CACHE BOOL "Build with DIST_KVSTORE support")
#----------------------------
# performance settings
#----------------------------
set(USE_OPERATOR_TUNING ON CACHE BOOL "Enable auto-tuning of operators")
set(USE_GPERFTOOLS OFF CACHE BOOL "Build with GPerfTools support")
set(USE_JEMALLOC OFF CACHE BOOL "Build with Jemalloc support")
#----------------------------
# additional operators
#----------------------------
# path to folders containing projects specific operators that you don't want to
# put in src/operators
SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH")
#----------------------------
# other features
#----------------------------
# Create C++ interface package
set(USE_CPP_PACKAGE OFF CACHE BOOL "Build C++ Package")
# Use int64_t type to represent the total number of elements in a tensor
# This will cause performance degradation reported in issue #14496
# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647
# Note: the size of each dimension is still bounded by INT32_MAX
set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total number of elements in a tensor")
# Other GPU features
set(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.")
set(ENABLE_CUDA_RTC ON CACHE BOOL "Build with CUDA runtime compilation support")
set(USE_NVTX ON CACHE BOOL "Build with NVTX support")