-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Big data op_test benchmark, for checking output consistent in different runs. #10646
Changes from 15 commits
af55bf3
7c14fe7
592afdc
0075538
c2b8507
2cb2a29
01475ad
757e4e9
11491a3
56056bc
9775361
095f607
b0f636d
a257f9d
e2e9bd4
fdc517f
594afdc
740c69b
e67e7f3
a51da64
e62a99b
50c266c
433af3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,6 +72,8 @@ def convert_np_dtype_to_dtype_(np_dtype): | |
return core.VarDesc.VarType.INT64 | ||
elif dtype == np.bool: | ||
return core.VarDesc.VarType.BOOL | ||
elif dtype == np.uint16: | ||
return core.VarDesc.VarType.INT16 | ||
elif dtype == np.uint8: | ||
return core.VarDesc.VarType.UINT8 | ||
else: | ||
|
@@ -368,6 +370,13 @@ class Operator(object): | |
Block. Users can use the build in instructions to describe their neural | ||
network. | ||
""" | ||
OP_WITHOUT_KERNEL_SET = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This set is sad...Can you file a issue and assign to me to clean it up? So that I don't forget. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
'feed', 'fetch', 'save', 'load', 'recurrent', 'go', | ||
'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', 'recv', | ||
'listen_and_serv', 'parallel_do', 'save_combine', 'load_combine', | ||
'ncclInit', 'channel_create', 'channel_close', 'channel_send', | ||
'channel_recv', 'select' | ||
} | ||
|
||
def __init__(self, | ||
block, | ||
|
@@ -486,17 +495,13 @@ def find_name(var_list, name): | |
self.desc.set_attr(attr_name, attrs[attr_name]) | ||
|
||
self.desc.check_attrs() | ||
no_kernel_op_set = { | ||
'feed', 'fetch', 'save', 'load', 'recurrent', 'go', | ||
'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', | ||
'recv', 'listen_and_serv', 'parallel_do', 'save_combine', | ||
'load_combine', 'ncclInit', 'channel_create', 'channel_close', | ||
'channel_send', 'channel_recv', 'select', 'gen_nccl_id' | ||
} | ||
if type not in no_kernel_op_set: | ||
if self.has_kernel(type): | ||
self.desc.infer_var_type(self.block.desc) | ||
self.desc.infer_shape(self.block.desc) | ||
|
||
def has_kernel(self, op_type): | ||
return op_type not in self.OP_WITHOUT_KERNEL_SET | ||
|
||
def to_string(self, throw_on_error): | ||
""" | ||
To debug string. | ||
|
@@ -720,7 +725,9 @@ def idx(self): | |
|
||
def var(self, name): | ||
if not isinstance(name, basestring): | ||
raise TypeError() | ||
raise TypeError( | ||
"var require string as parameter, but get %s instead." % | ||
(type(name))) | ||
v = self.vars.get(name, None) | ||
if v is None: | ||
raise ValueError("var %s not in this block" % name) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import numpy as np | ||
import unittest | ||
import time | ||
import itertools | ||
|
||
import paddle.fluid as fluid | ||
import paddle.fluid.core as core | ||
from paddle.fluid.op import Operator | ||
from op_test import OpTest | ||
|
||
|
||
class BenchmarkSuite(OpTest): | ||
def timeit_function(self, callback, iters, *args, **kwargs): | ||
assert iters != 0, "Iters should >= 1" | ||
start = time.time() | ||
for i in range(iters): | ||
callback(*args, **kwargs) | ||
elapse = time.time() - start | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +=? shoud elapse be initiated before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
return elapse / iters | ||
|
||
def _assert_cpu_gpu_same(self, cpu_outs, gpu_outs, fetch_list, atol): | ||
for item_cpu_out, item_gpu_out, variable in zip(cpu_outs, gpu_outs, | ||
fetch_list): | ||
# the cpu version is baseline, expect gpu version keep same with cpu version. | ||
expect = item_cpu_out | ||
expect_t = np.array(item_cpu_out) | ||
actual = item_gpu_out | ||
actual_t = np.array(item_gpu_out) | ||
var_name = variable if isinstance(variable, | ||
basestring) else variable.name | ||
self.assertTrue( | ||
np.allclose( | ||
actual_t, expect_t, atol=atol), | ||
"Output (" + var_name + ") has diff" + str(actual_t) + "\n" + | ||
str(expect_t)) | ||
self.assertListEqual(actual.lod(), | ||
expect.lod(), | ||
"Output (" + var_name + ") has different lod") | ||
|
||
def _get_input_names(self): | ||
inputs = [] | ||
for name, value in self.inputs.iteritems(): | ||
if isinstance(value, list): | ||
inputs.extend([sub_name for sub_name, _ in value]) | ||
inputs.append(name) | ||
return inputs | ||
|
||
def _get_output_names(self): | ||
outputs = [] | ||
for var_name, var in self.outputs.iteritems(): | ||
if isinstance(var, list): | ||
for v in var: | ||
outputs.append(v) | ||
else: | ||
outputs.append(var) | ||
if len(outputs) == 0: | ||
for out_name, out_dup in Operator.get_op_outputs(self.op_type): | ||
outputs.append(str(out_name)) | ||
return outputs | ||
|
||
def check_output_stability(self, atol=1e-8): | ||
places = self._get_places() | ||
if len(places) < 2: | ||
return | ||
cpu_outs, fetch_list = self._calc_output(places[0]) | ||
gpu_outs, _ = self._calc_output(places[1]) | ||
self._assert_cpu_gpu_same(cpu_outs, gpu_outs, fetch_list, atol) | ||
|
||
def timeit_output_with_place(self, place, iters): | ||
return self.timeit_function(self.calc_output, iters, place) | ||
|
||
def timeit_output(self, iters=100): | ||
places = self._get_places() | ||
elapses = [] | ||
for place in places: | ||
elapses.append(self.timeit_output_with_place(place, iters)) | ||
for place, elapse in zip(places, elapses): | ||
print("One pass of ({2}_op) at {0} cost {1}".format( | ||
str(place), elapse, self.op_type)) | ||
|
||
def timeit_grad_with_place(self, place, iters=100): | ||
inputs_to_check = self._get_input_names() | ||
output_names = self._get_output_names() | ||
return self.timeit_function( | ||
self._get_gradient, | ||
iters, | ||
inputs_to_check, | ||
place, | ||
output_names, | ||
no_grad_set=None) | ||
|
||
def timeit_grad(self, iters=100): | ||
places = self._get_places() | ||
elapses = [] | ||
for place in places: | ||
elapses.append(self.timeit_grad_with_place(place, iters)) | ||
for place, elapse in zip(places, elapses): | ||
print("One pass of ({2}_grad_op) at {0} cost {1}".format( | ||
str(place), elapse, self.op_type)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import unittest | ||
import numpy as np | ||
|
||
import paddle.fluid as fluid | ||
from benchmark import BenchmarkSuite | ||
from op_test import OpTest | ||
|
||
# This is a demo op test case for operator benchmarking and high resolution number stability alignment. | ||
|
||
|
||
class TestSumOp(BenchmarkSuite): | ||
def setUp(self): | ||
self.op_type = "sum" | ||
self.customize_testcase() | ||
self.customize_fetch_list() | ||
|
||
def customize_fetch_list(self): | ||
""" | ||
customize fetch list, configure the wanted variables. | ||
>>> self.fetch_list = ["Out"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a tab? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The output of operator will be automatically inserted into fetch list, it is same here. |
||
""" | ||
pass | ||
|
||
def customize_testcase(self): | ||
# a test case | ||
x0 = np.random.random((300, 400)).astype('float32') | ||
x1 = np.random.random((300, 400)).astype('float32') | ||
x2 = np.random.random((300, 400)).astype('float32') | ||
|
||
self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]} | ||
# NOTE: if the output is empty, then it will autofilled by benchmarkSuite. | ||
# self.outputs = {"Out": x0 + x1 + x2} | ||
|
||
def test_check_output(self): | ||
""" | ||
compare the output with customized output. In this case, | ||
you should set the correct output by hands. | ||
>>> self.outputs = {"Out": x0 + x1 + x2} | ||
""" | ||
self.check_output(atol=1e-8) | ||
|
||
def test_output_stability(self): | ||
# compare the cpu gpu output in high resolution. | ||
self.check_output_stability() | ||
|
||
def test_timeit_output(self): | ||
""" | ||
perf the op, time cost will be averged in iters. | ||
output example | ||
>>> One pass of (sum_op) at CPUPlace cost 0.000461330413818 | ||
>>> One pass of (sum_op) at CUDAPlace(0) cost 0.000556070804596 | ||
""" | ||
self.timeit_output(iters=100) | ||
|
||
def test_timeit_grad(self): | ||
""" | ||
perf the op gradient, time cost will be averged in iters. | ||
output example | ||
>>> One pass of (sum_grad_op) at CPUPlace cost 0.00279935121536 | ||
>>> One pass of (sum_grad_op) at CUDAPlace(0) cost 0.00500632047653 | ||
""" | ||
self.timeit_grad(iters=100) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
uint16 and int16 is different?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is obvious you are right, but we have to use INT16 here, because
we do not have unsigned data type in OpProto, both for uint16, uint32 and uint64.
https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/framework.proto#L97
the uint16 in this PR is only for float16 in op_test, it seems a little confusing but it is the fact.
https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/unittests/op_test.py#L473
The guy who do the fp16 job explain the reason, "the pybind does not have the built-in float16 support, so he chooses INT16 to allocate the same size memory".
Considering that our messed datatype in python, it definitely needs to be clean up.