Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Merge changes
Browse files Browse the repository at this point in the history
  • Loading branch information
anirudh2290 committed Jul 4, 2019
2 parents e3a5f71 + faccc59 commit 6215eef
Show file tree
Hide file tree
Showing 84 changed files with 3,533 additions and 989 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ mxnet_option(ENABLE_CUDA_RTC "Build with CUDA runtime compilation support"
mxnet_option(BUILD_CPP_EXAMPLES "Build cpp examples" ON)
mxnet_option(INSTALL_EXAMPLES "Install the example source files." OFF)
mxnet_option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." ON)
mxnet_option(USE_TENSORRT "Enable infeference optimization with TensorRT." OFF)
mxnet_option(USE_TENSORRT "Enable inference optimization with TensorRT." OFF)
mxnet_option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
mxnet_option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
mxnet_option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
Expand Down
3 changes: 2 additions & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ The committers are the granted write access to the project.
- Tao is a major contributor to the MXNet MKL-DNN backend and performance on CPU.
* [Zach Kimberg](https://github.com/zachgk)
- Zach is one of the major maintainers of the MXNet Scala package.
* [Lin Yuan](https://github.com/apeforest)
- Lin supports MXNet distributed training using Horovod and is also a major contributor to higher order gradients.


### Become a Committer
Expand Down Expand Up @@ -199,7 +201,6 @@ List of Contributors
* [Thomas Delteil](https://github.com/ThomasDelteil)
* [Jesse Brizzi](https://github.com/jessebrizzi)
* [Hang Zhang](http://hangzh.com)
* [Lin Yuan](https://github.com/apeforest)
* [Kou Ding](https://github.com/chinakook)
* [Istvan Fehervari](https://github.com/ifeherva)
* [Aaron Markham](https://github.com/aaronmarkham)
Expand Down
4 changes: 2 additions & 2 deletions amalgamation/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ jni_libmxnet_predict.so: jni_libmxnet_predict.o
ifneq ($(ANDROID), 1)
android:
else
CFLAGS+= -mhard-float -D_NDK_MATH_NO_SOFTFP=1 -O3
LDFLAGS+= -Wl,--no-warn-mismatch -lm_hard
CFLAGS+= -O3
LDFLAGS+= -Wl,--no-warn-mismatch -lm_hard
android: jni_libmxnet_predict.so
endif

Expand Down
5 changes: 3 additions & 2 deletions benchmark/opperf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ With this utility, for each MXNet operator you can get the following details:
**Timing**
1. Forward execution time
2. Backward execution time
3. Time spent for memory management

**Memory**
1. Total memory allocated
1. Average and Max memory allocated

NOTE: This is the `pool memory`. It does not reflect the exact memory requested by the operator.

# Motivation

Expand Down
8 changes: 4 additions & 4 deletions benchmark/opperf/nd_operations/binary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
get_all_elemen_wise_binary_operators


def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the binary
broadcast operators in MXNet.
Expand All @@ -48,9 +48,9 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand All @@ -65,7 +65,7 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
return mx_binary_op_results


def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the binary
element_wise operators in MXNet.
Expand Down
13 changes: 6 additions & 7 deletions benchmark/opperf/nd_operations/gemm_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list

from benchmark.opperf.rules.default_params import MX_OP_MODULE
"""Performance benchmark tests for MXNet NDArray GEMM Operators.
1. dot
Expand All @@ -35,7 +34,7 @@
"""


def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the GEMM
operators (dot, batch_dot) in MXNet.
Expand All @@ -45,9 +44,9 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand All @@ -57,7 +56,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
"""
# Benchmark tests for dot and batch_dot operators
dot_benchmark_res = run_performance_test(
[nd.dot], run_backward=True,
[getattr(MX_OP_MODULE, "dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (1024, 1024),
"rhs": (1024, 1024)},
Expand All @@ -71,7 +70,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
warmup=warmup, runs=runs)

batch_dot_benchmark_res = run_performance_test(
[nd.batch_dot], run_backward=True,
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (32, 1024, 1024),
"rhs": (32, 1024, 1024)},
Expand Down
15 changes: 8 additions & 7 deletions benchmark/opperf/nd_operations/nn_activation_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray Activation Operators.
Expand All @@ -35,7 +35,7 @@
"""


def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the activation
operators (relu, sigmoid, softmax) in MXNet.
Expand All @@ -45,9 +45,9 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand All @@ -56,7 +56,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
"""
# Relu and its variation
relu_benchmark_res = run_performance_test([nd.LeakyReLU],
relu_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "LeakyReLU")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -78,7 +78,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10

# Sigmoid => Covered as part of Unary ops
# Hard_Sigmoid
hard_sigmoid_benchmark_res = run_performance_test([nd.hard_sigmoid],
hard_sigmoid_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "hard_sigmoid")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -90,7 +90,8 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
runs=runs)

# Softmax, LogSoftmax
softmax_benchmark_res = run_performance_test([nd.softmax, nd.log_softmax],
softmax_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "softmax"),
getattr(MX_OP_MODULE, "log_softmax")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
10 changes: 5 additions & 5 deletions benchmark/opperf/nd_operations/nn_basic_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray basic NN Operators.
Expand All @@ -29,9 +29,9 @@
"""


def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
# FullyConnnected operator benchmarks
fc_benchmark_res = run_performance_test([nd.FullyConnected],
fc_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "FullyConnected")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -49,7 +49,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
runs=runs)

# Dropout benchmarks
dropout_benchmark_res = run_performance_test([nd.Dropout],
dropout_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "Dropout")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -62,7 +62,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
warmup=warmup,
runs=runs)
# BatchNorm benchmarks
batchnorm_benchmark_res = run_performance_test([nd.BatchNorm],
batchnorm_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "BatchNorm")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
20 changes: 9 additions & 11 deletions benchmark/opperf/nd_operations/nn_conv_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray Convolution and Pooling Operators.
Expand Down Expand Up @@ -51,7 +51,7 @@
"""


def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
pool_types = ['avg', 'max', 'sum']
global_pool_types = [0, 1]

Expand All @@ -61,7 +61,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
for pool_type in pool_types:
for global_pool in global_pool_types:
for pool1d_data in [(32, 3, 256), (32, 3, 64)]:
pool1d_benchmark_res += run_performance_test([nd.Pooling],
pool1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -70,13 +70,12 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
"pool_type": pool_type,
"global_pool": global_pool,
"stride": 1,
"pad": 1,
"layout": 'NCW'}
"pad": 1}
],
warmup=warmup,
runs=runs)
for pool2d_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
pool2d_benchmark_res += run_performance_test([nd.Pooling],
pool2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -85,8 +84,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
"pool_type": pool_type,
"global_pool": global_pool,
"stride": (1, 1),
"pad": (0, 0),
"layout": 'NCHW'}
"pad": (0, 0)}
],
warmup=warmup,
runs=runs)
Expand All @@ -95,11 +93,11 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
return mx_pooling_op_results


def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
# Conv1D Benchmarks
conv1d_benchmark_res = []
for conv_data in [(32, 3, 256), (32, 3, 64)]:
conv1d_benchmark_res += run_performance_test([nd.Convolution],
conv1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -118,7 +116,7 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=1
# Conv2D Benchmarks
conv2d_benchmark_res = []
for conv_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
conv2d_benchmark_res += run_performance_test([nd.Convolution],
conv2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/random_sampling_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from benchmark.opperf.utils.op_registry_utils import get_all_random_sampling_operators


def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the random sampling
operators in MXNet.
Expand All @@ -44,9 +44,9 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', w
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/reduction_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the reduction
operators in MXNet.
Expand All @@ -41,9 +41,9 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/unary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the unary
operators in MXNet.
Expand All @@ -45,9 +45,9 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand Down
Loading

0 comments on commit 6215eef

Please sign in to comment.