Skip to content

Commit

Permalink
fix optimizer dtype (#29917)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiqiu authored Dec 28, 2020
1 parent 9602a18 commit a4b9daf
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 8 deletions.
11 changes: 9 additions & 2 deletions python/paddle/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,12 @@ def __init__(self,
self.regularization = regularization
self._grad_clip = grad_clip
self._learning_rate = learning_rate
# the learning rate type should be inferenced from loss

self._dtype = None
# Infer the dtype form parameter
if self._parameter_list:
self._dtype = self._parameter_list[0].dtype

# each program should have a independent learning rate
# program -> Variable(learning_rate)
self._learning_rate_map = dict()
Expand Down Expand Up @@ -768,7 +772,10 @@ def backward(self,
else:
act_no_grad_set = self._get_no_grad_set(loss, no_grad_set)

self._dtype = loss.dtype
# Infer dtype by loss if None
if self._dtype is None:
self._dtype = loss.dtype

if framework.in_dygraph_mode():
parameter_list = parameter_list if parameter_list \
else self._parameter_list
Expand Down
35 changes: 34 additions & 1 deletion python/paddle/fluid/tests/unittests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import paddle.compat as cpt
import numpy as np
from paddle.fluid.backward import append_backward
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.framework import Program, program_guard, convert_np_dtype_to_dtype_
import paddle


class TestOptimizer(unittest.TestCase):
Expand Down Expand Up @@ -1042,5 +1043,37 @@ def test_program_desc(self, ):
['sgd', 'sgd'])


class TestOptimizerDtype(unittest.TestCase):
'''
The dtype of optimizer should be inferred by parameters, and the learning rate
is cteated with the same dtype.
'''

def check_with_dtype(self, dtype):
class MyLayer(paddle.nn.Layer):
def __init__(self, dtype):
super(MyLayer, self).__init__()
self._w = self.create_parameter([2, 3], dtype=dtype)
self._b = self.create_parameter([2, 3], dtype=dtype)

def forward(self, x):
return x * self._w + self._b

with paddle.fluid.dygraph.guard():
model = MyLayer(dtype)
x = paddle.rand([10, 2, 3], dtype=dtype)
loss = model(x)
adam = paddle.optimizer.Adam(parameters=model.parameters())
loss.backward()
adam.step()
self.assertEqual(adam._dtype, convert_np_dtype_to_dtype_(dtype))

def test_float64(self):
self.check_with_dtype('float64')

def test_float32(self):
self.check_with_dtype('float32')


if __name__ == '__main__':
unittest.main()
1 change: 0 additions & 1 deletion python/paddle/optimizer/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ def step(self):
adam.step()
adam.clear_grad()
"""
self._dtype = None
params_grads = []
for param in self._parameter_list:
if not param.trainable:
Expand Down
1 change: 0 additions & 1 deletion python/paddle/optimizer/adamw.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ def minimize(self,
@framework.dygraph_only
@imperative_base.no_grad
def step(self):
self._dtype = None
params_grads = []
for param in self._parameter_list:
if not param.trainable:
Expand Down
13 changes: 10 additions & 3 deletions python/paddle/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,12 @@ def __init__(self,
self.regularization = weight_decay
self._grad_clip = grad_clip
self._learning_rate = learning_rate
# the learning rate type should be inferenced from loss

self._dtype = None
# Infer the dtype form parameter
if self._parameter_list:
self._dtype = self._parameter_list[0].dtype

# each program should have a independent learning rate
# program -> tensor(learning_rate)
self._learning_rate_map = dict()
Expand Down Expand Up @@ -675,7 +679,10 @@ def backward(self,
else:
act_no_grad_set = self._get_no_grad_set(loss, no_grad_set)

self._dtype = loss.dtype
# Infer dtype by loss if None
if self._dtype is None:
self._dtype = loss.dtype

if framework.in_dygraph_mode():
parameter_list = parameters if parameters \
else self._parameter_list
Expand Down Expand Up @@ -885,6 +892,7 @@ def minimize(self,

return optimize_ops, params_grads

@imperative_base.no_grad
@framework.dygraph_only
def step(self):
"""
Expand All @@ -910,7 +918,6 @@ def step(self):
adam.step()
adam.clear_grad()
"""
self._dtype = None
params_grads = []
for param in self._parameter_list:
if not param.trainable:
Expand Down

0 comments on commit a4b9daf

Please sign in to comment.