Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BF16 uniform random initializer #32468

Merged
merged 4 commits into from
Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions paddle/fluid/operators/fill_constant_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ class FillConstantKernel : public framework::OpKernel<T> {
}

if (actual_place == 0) {
VLOG(4) << "[CPU] FillConstantKernel"
<< ((data_type == framework::proto::VarType::BF16) ? "<bfloat16>"
: "<T>");
tensor->mutable_data(platform::CPUPlace(), data_type);
math::SetConstant<platform::CPUDeviceContext, T> functor;
functor(reinterpret_cast<const platform::CPUDeviceContext &>(dev_ctx),
Expand Down
58 changes: 43 additions & 15 deletions paddle/fluid/operators/uniform_random_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,41 @@ limitations under the License. */
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/bfloat16.h"

namespace paddle {
namespace operators {

namespace {
template <typename T>
inline void UniformRealDistribution(T *data, const int64_t &size,
const float &min, const float &max,
const unsigned int &seed) {
VLOG(4) << "[CPU] UniformRandomKernel<T>";
std::uniform_real_distribution<T> dist(static_cast<T>(min),
static_cast<T>(max));
auto engine = paddle::framework::GetCPURandomEngine(seed);

for (int64_t i = 0; i < size; ++i) {
data[i] = dist(*engine);
}
}

template <>
inline void UniformRealDistribution(paddle::platform::bfloat16 *data,
const int64_t &size, const float &min,
const float &max,
const unsigned int &seed) {
VLOG(4) << "[CPU] UniformRandomKernel<bfloat16>";
std::uniform_real_distribution<float> dist(min, max);
auto engine = paddle::framework::GetCPURandomEngine(seed);

for (int64_t i = 0; i < size; ++i) {
data[i] = static_cast<paddle::platform::bfloat16>(dist(*engine));
}
}
} // namespace

// It seems that Eigen::Tensor::random in GPU will SEGFAULT.
// Use std::random and thrust::random(thrust is a std library in CUDA) to
// implement uniform random.
Expand Down Expand Up @@ -61,17 +92,11 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
framework::ToTypeName(out_var->Type())));
}
T *data = tensor->mutable_data<T>(ctx.GetPlace());

int64_t size = tensor->numel();
std::uniform_real_distribution<T> dist(
static_cast<T>(ctx.Attr<float>("min")),
static_cast<T>(ctx.Attr<float>("max")));
unsigned int seed = static_cast<unsigned int>(ctx.Attr<int>("seed"));
auto engine = framework::GetCPURandomEngine(seed);

for (int64_t i = 0; i < size; ++i) {
data[i] = dist(*engine);
}
UniformRealDistribution<T>(
data, size, ctx.Attr<float>("min"), ctx.Attr<float>("max"),
static_cast<unsigned int>(ctx.Attr<int>("seed")));

unsigned int diag_num =
static_cast<unsigned int>(ctx.Attr<int>("diag_num"));
Expand Down Expand Up @@ -257,9 +282,12 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
paddle::operators::UniformRandomOpVarTypeInference);

REGISTER_OP_CPU_KERNEL(uniform_random,
paddle::operators::CPUUniformRandomKernel<float>,
paddle::operators::CPUUniformRandomKernel<double>);
REGISTER_OP_CPU_KERNEL(uniform_random_batch_size_like,
paddle::operators::CPUUniformRandomKernel<float>,
paddle::operators::CPUUniformRandomKernel<double>);
REGISTER_OP_CPU_KERNEL(
uniform_random, paddle::operators::CPUUniformRandomKernel<float>,
paddle::operators::CPUUniformRandomKernel<double>,
paddle::operators::CPUUniformRandomKernel<paddle::platform::bfloat16>);
REGISTER_OP_CPU_KERNEL(
uniform_random_batch_size_like,
paddle::operators::CPUUniformRandomKernel<float>,
paddle::operators::CPUUniformRandomKernel<double>,
paddle::operators::CPUUniformRandomKernel<paddle::platform::bfloat16>);
9 changes: 4 additions & 5 deletions paddle/fluid/operators/uniform_random_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ namespace operators {
using Tensor = framework::Tensor;

inline std::vector<int64_t> GetNewDataFromShapeTensor(
const Tensor *new_data_tensor) {
const Tensor* new_data_tensor) {
if (new_data_tensor->type() == framework::proto::VarType::INT64) {
auto *new_data = new_data_tensor->data<int64_t>();
auto* new_data = new_data_tensor->data<int64_t>();
framework::Tensor cpu_starts_tensor;
if (platform::is_gpu_place(new_data_tensor->place())) {
TensorCopySync(*new_data_tensor, platform::CPUPlace(),
Expand All @@ -37,7 +37,7 @@ inline std::vector<int64_t> GetNewDataFromShapeTensor(
new_data + new_data_tensor->numel());
return vec_new_data;
} else if (new_data_tensor->type() == framework::proto::VarType::INT32) {
auto *new_data = new_data_tensor->data<int32_t>();
auto* new_data = new_data_tensor->data<int32_t>();
std::vector<int64_t> vec_new_data;
framework::Tensor cpu_starts_tensor;
if (platform::is_gpu_place(new_data_tensor->place())) {
Expand All @@ -58,7 +58,7 @@ inline std::vector<int64_t> GetNewDataFromShapeTensor(
}

inline std::vector<int64_t> GetNewDataFromShapeTensorList(
const std::vector<const Tensor *> &list_new_shape_tensor) {
const std::vector<const Tensor*>& list_new_shape_tensor) {
std::vector<int64_t> vec_new_shape;
vec_new_shape.reserve(list_new_shape_tensor.size());
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
Expand Down Expand Up @@ -97,6 +97,5 @@ inline std::vector<int64_t> GetNewDataFromShapeTensorList(

return vec_new_shape;
}

} // namespace operators
} // namespace paddle
16 changes: 10 additions & 6 deletions python/paddle/fluid/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def __call__(self, var, block=None):
self._seed = block.program.random_seed

# to be compatible of fp16 initializers
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if var.dtype == VarDesc.VarType.FP16:
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
Expand Down Expand Up @@ -276,7 +276,7 @@ def __call__(self, var, block=None):
},
stop_gradient=True)

if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if var.dtype == VarDesc.VarType.FP16:
block.append_op(
type="cast",
inputs={"X": out_var},
Expand Down Expand Up @@ -542,7 +542,8 @@ def __call__(self, var, block=None):
self._seed = block.program.random_seed

# to be compatible of fp16 initalizers
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform):
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
Expand Down Expand Up @@ -584,7 +585,8 @@ def __call__(self, var, block=None):
},
stop_gradient=True)

if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform):
block.append_op(
type="cast",
inputs={"X": out_var},
Expand Down Expand Up @@ -673,7 +675,8 @@ def __call__(self, var, block=None):
self._seed = block.program.random_seed

# to be compatible of fp16 initalizers
if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform):
out_dtype = VarDesc.VarType.FP32
out_var = block.create_var(
name=unique_name.generate(".".join(
Expand Down Expand Up @@ -715,7 +718,8 @@ def __call__(self, var, block=None):
},
stop_gradient=True)

if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]:
if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform):
block.append_op(
type="cast",
inputs={"X": out_var},
Expand Down
7 changes: 4 additions & 3 deletions python/paddle/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10523,10 +10523,10 @@ def uniform_random_batch_size_like(input,


"""
check_variable_and_dtype(input, 'Input', ("float32", 'float64'),
check_variable_and_dtype(input, 'Input', ("float32", 'float64', "uint16"),
'uniform_random_batch_size_like')
check_type(shape, 'shape', (list, tuple), 'uniform_random_batch_size_like')
check_dtype(dtype, 'dtype', ('float32', 'float64'),
check_dtype(dtype, 'dtype', ('float32', 'float64', "uint16"),
'uniform_random_batch_size_like')

helper = LayerHelper('uniform_random_batch_size_like', **locals())
Expand Down Expand Up @@ -15120,7 +15120,8 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0,
float(max), 'seed', seed, 'dtype', dtype)

check_type(shape, 'shape', (list, tuple, Variable), 'uniform_random/rand')
check_dtype(dtype, 'dtype', ('float32', 'float64'), 'uniform_random/rand')
check_dtype(dtype, 'dtype', ('float32', 'float64', 'uint16'),
'uniform_random/rand')

inputs = dict()
attrs = {'seed': seed, 'min': min, 'max': max, 'dtype': dtype}
Expand Down
45 changes: 26 additions & 19 deletions python/paddle/fluid/tests/unittests/test_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_constant_initializer_default_value(self, dtype="float32"):
lod_level=0,
name="param",
initializer=initializer.ConstantInitializer())
num_ops = 2 if dtype in ["float16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'fill_constant')
Expand All @@ -72,7 +72,7 @@ def test_constant_initializer(self, dtype="float32"):
lod_level=0,
name="param",
initializer=initializer.ConstantInitializer(2.3))
num_ops = 2 if dtype in ["float16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'fill_constant')
Expand Down Expand Up @@ -108,7 +108,7 @@ def test_uniform_initializer_default_value(self, dtype="float32"):
lod_level=0,
name="param",
initializer=initializer.UniformInitializer())
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_uniform_initializer(self, dtype="float32"):
lod_level=0,
name="param",
initializer=initializer.UniformInitializer(-4.2, 3.1, 123))
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
Expand All @@ -174,7 +174,7 @@ def test_uniform_initializer_two_op(self, dtype="float32"):
lod_level=0,
name="param",
initializer=initializer.UniformInitializer(-4.2, float(i), 123))
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op0 = block.ops[0]
self.assertEqual(init_op0.type, 'uniform_random')
Expand All @@ -195,13 +195,11 @@ def test_uniform_initializer_fp16(self):

def test_uniform_initializer_bf16(self):
"""Test uniform initializer with bfloat16
No cast operator has been added here
"""
block = self.test_uniform_initializer_default_value("uint16")
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_uniform_initializer(dtype="uint16")
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_uniform_initializer_two_op("uint16")
self.assertTrue(check_cast_op(block.ops[1]))


class TestNormalInitializer(unittest.TestCase):
Expand Down Expand Up @@ -347,7 +345,9 @@ def test_normal_xavier_initializer_conv(self):
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)

def test_xavier_initializer_supplied_arguments(self, dtype="float32"):
def test_xavier_initializer_supplied_arguments(self,
dtype="float32",
uniform=True):
"""Test the Xavier initializer with supplied arguments
"""
program = framework.Program()
Expand All @@ -359,14 +359,18 @@ def test_xavier_initializer_supplied_arguments(self, dtype="float32"):
lod_level=0,
name="param",
initializer=initializer.XavierInitializer(
fan_in=12, fan_out=23, seed=134))
num_ops = 2 if dtype in ["float16", "uint16"] else 1
uniform=uniform, fan_in=12, fan_out=23, seed=134))
num_ops = 2 if (dtype == "float16" or (dtype == "uint16" and
not uniform)) else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
limit = np.sqrt(6.0 / (12 + 23))
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
if uniform:
self.assertEqual(init_op.type, 'uniform_random')
limit = np.sqrt(6.0 / (12 + 23))
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
else:
self.assertEqual(init_op.type, 'gaussian_random')
self.assertEqual(init_op.attr('seed'), 134)
return block

Expand All @@ -379,8 +383,12 @@ def test_xavier_initializer_fp16(self):
def test_xavier_initializer_bf16(self):
"""Test the Xavier initializer with bfloat16
"""
block = self.test_xavier_initializer_supplied_arguments("uint16")
self.assertTrue(check_cast_op(block.ops[1]))
block_uniform = self.test_xavier_initializer_supplied_arguments(
"uint16")
self.assertEqual(len(block_uniform.ops), 1)
block_gaussian = self.test_xavier_initializer_supplied_arguments(
"uint16", False)
self.assertTrue(check_cast_op(block_gaussian.ops[1]))


class TestMSRAInitializer(unittest.TestCase):
Expand Down Expand Up @@ -483,7 +491,7 @@ def test_msra_initializer_supplied_arguments(self, dtype="float32"):
name="param",
initializer=initializer.MSRAInitializer(
fan_in=12, seed=134))
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
Expand All @@ -503,7 +511,6 @@ def test_msra_initializer_bf16(self):
"""Test the MSRA initializer with bfloat16
"""
block = self.test_msra_initializer_supplied_arguments("uint16")
self.assertTrue(check_cast_op(block.ops[1]))


class TestBilinearInitializer(unittest.TestCase):
Expand Down
11 changes: 4 additions & 7 deletions python/paddle/fluid/tests/unittests/test_initializer_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def test_uniform_common(self, dtype="float32", seed=0):
lod_level=0,
name="param",
initializer=initializer.Uniform())
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
Expand Down Expand Up @@ -256,7 +256,7 @@ def test_uniform_initializer_default_value(self,
lod_level=0,
name="param",
initializer=initializer.Uniform())
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
Expand Down Expand Up @@ -287,7 +287,7 @@ def test_uniform_initializer(self,
lod_level=0,
name="param",
initializer=initializer.Uniform(min_value, max_vlaue))
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
Expand Down Expand Up @@ -317,7 +317,7 @@ def test_uniform_initializer_two_op(self,
lod_level=0,
name="param",
initializer=initializer.Uniform(min_value, float(i)))
num_ops = 2 if dtype in ["float16", "uint16"] else 1
num_ops = 2 if dtype == "float16" else 1
self.assertEqual(len(block.ops), num_ops)
init_op0 = block.ops[0]
self.assertEqual(init_op0.type, 'uniform_random')
Expand All @@ -343,11 +343,8 @@ def test_uniform_initializer_bf16(self):
"""Test uniform initializer with bfloat16
"""
block = self.test_uniform_initializer_default_value("uint16") #bfloat16
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_uniform_initializer(dtype="uint16") #bfloat16
self.assertTrue(check_cast_op(block.ops[1]))
block = self.test_uniform_initializer_two_op("uint16") #bfloat16
self.assertTrue(check_cast_op(block.ops[1]))

def test_uniform_initializer_dygraph(self):
"""Test uniform initializer in dygraph model.
Expand Down
Loading