From f07ab3cd8eb31e9168b1f6a14745c84893807d73 Mon Sep 17 00:00:00 2001 From: co63oc Date: Sun, 4 Feb 2024 13:05:16 +0800 Subject: [PATCH] Fix VarType --- .../distributed/passes/auto_parallel_fp16.py | 24 +++++++------------ .../passes/auto_parallel_master_grad.py | 7 +++--- python/paddle/hapi/model.py | 5 +--- .../distributed/models/moe/grad_clip.py | 6 ++--- .../optimizer/distributed_fused_lamb.py | 6 ++--- 5 files changed, 19 insertions(+), 29 deletions(-) diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py index dc8ec93d36ff0..2cf08793bddf9 100644 --- a/python/paddle/distributed/passes/auto_parallel_fp16.py +++ b/python/paddle/distributed/passes/auto_parallel_fp16.py @@ -56,17 +56,11 @@ def set_op_dtype_to_fp16(op): - if ( - op.has_attr('in_dtype') - and op.attr('in_dtype') == core.VarDesc.VarType.FP32 - ): + if op.has_attr('in_dtype') and op.attr('in_dtype') == paddle.float32: op._set_attr('in_dtype', __target_dtype__) - if ( - op.has_attr('out_dtype') - and op.attr('out_dtype') == core.VarDesc.VarType.FP32 - ): + if op.has_attr('out_dtype') and op.attr('out_dtype') == paddle.float32: op._set_attr('out_dtype', __target_dtype__) - if op.has_attr('dtype') and op.attr('dtype') == core.VarDesc.VarType.FP32: + if op.has_attr('dtype') and op.attr('dtype') == paddle.float32: op._set_attr('dtype', __target_dtype__) @@ -297,7 +291,7 @@ def set_var_to_fp16(self, var_name, block): ): return - if var.dtype == core.VarDesc.VarType.FP32: + if var.dtype == paddle.float32: var.desc.set_dtype(__target_dtype__) def resolute_cast_op(self, block): @@ -445,9 +439,7 @@ def _insert_forward_cast_ops( num_cast_ops = 0 for in_name in op.input_names: - if src_dtype == core.VarDesc.VarType.FP32 and _keep_fp32_input( - op, in_name - ): + if src_dtype == paddle.float32 and _keep_fp32_input(op, in_name): continue consume_op_attr = dist_context.get_op_dist_attr_for_program(op) @@ -692,7 +684,7 @@ def _check_and_update_gradient(grads, loss_scaling, name, dist_context): def _split_grads(params_grads): grads = [g for _, g in params_grads] - fp32_grads = [g for g in grads if g.dtype == core.VarDesc.VarType.FP32] + fp32_grads = [g for g in grads if g.dtype == paddle.float32] fp16_grads = [g for g in grads if g.dtype == __target_dtype__] assert len(fp32_grads) + len(fp16_grads) == len( grads @@ -809,9 +801,9 @@ def is_initialization_op(op): 'dtype' ), f"initialization op is supported to has dtype attribute but got {str(op)}." out_var = startup_program.global_block().var(output_name) - if out_var.dtype == core.VarDesc.VarType.FP32: + if out_var.dtype == paddle.float32: out_var.desc.set_dtype(__target_dtype__) - if op.attr('dtype') == core.VarDesc.VarType.FP32: + if op.attr('dtype') == paddle.float32: op._set_attr('dtype', __target_dtype__) diff --git a/python/paddle/distributed/passes/auto_parallel_master_grad.py b/python/paddle/distributed/passes/auto_parallel_master_grad.py index 6435cee8259cb..f5271616ddf74 100644 --- a/python/paddle/distributed/passes/auto_parallel_master_grad.py +++ b/python/paddle/distributed/passes/auto_parallel_master_grad.py @@ -17,6 +17,7 @@ from collections import OrderedDict from typing import List, Tuple +import paddle from paddle.base import Variable from paddle.distributed.auto_parallel.static.utils import ( is_backward_op, @@ -118,10 +119,10 @@ def _add_cast_op(self, cur_block, grad_names: List[str], dist_context): for grad_name, idx in reversed(grad_first_ids.items()): grad_var = cur_block.var(grad_name) if ( - grad_var.dtype == core.VarDesc.VarType.FP16 - or grad_var.dtype == core.VarDesc.VarType.BF16 + grad_var.dtype == paddle.float16 + or grad_var.dtype == paddle.bfloat16 ): - is_fp16 = grad_var.dtype == core.VarDesc.VarType.FP16 + is_fp16 = grad_var.dtype == paddle.float16 producer_op = cur_block.ops[idx] producer_op_dist_attr = ( dist_context.get_op_dist_attr_for_program(producer_op) diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 866901b840a31..17e471bd9727c 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -542,10 +542,7 @@ def _run(self, inputs, labels=None): # train and test may take different arguments if inputs[idx] is not None: feed[n] = inputs[idx] - if ( - self._amp_level == 'O2' - and input_dtypes[idx] == core.VarDesc.VarType.FP16 - ): + if self._amp_level == 'O2' and input_dtypes[idx] == paddle.float16: if isinstance(feed[n], core.LoDTensor): feed[n] = feed[n]._as_type(core.VarDesc.VarType.FP16) elif isinstance(feed[n], np.array): diff --git a/python/paddle/incubate/distributed/models/moe/grad_clip.py b/python/paddle/incubate/distributed/models/moe/grad_clip.py index 2a0dd89b77ff9..edfea5acf8729 100644 --- a/python/paddle/incubate/distributed/models/moe/grad_clip.py +++ b/python/paddle/incubate/distributed/models/moe/grad_clip.py @@ -117,9 +117,9 @@ def get_l2_norm_pow(params_grads, sum_dtype=None): merge_grad = clip.merge_selected_rows(g) merge_grad = clip.get_tensor_from_selected_rows(merge_grad) sum_square = _squared_l2_norm(merge_grad) - if sum_square.dtype == core.VarDesc.VarType.FP16: + if sum_square.dtype == paddle.float16: sum_square_list_fp16.append(sum_square) - elif sum_square.dtype == core.VarDesc.VarType.FP32: + elif sum_square.dtype == paddle.float32: sum_square_list_fp32.append(sum_square) else: sum_square_list.append(sum_square) @@ -222,7 +222,7 @@ def _dygraph_clip(self, params_grads): # TODO(wangxi): use inplace elementwise_mul clip_input = ( clip_var.astype('float16') - if g.dtype == core.VarDesc.VarType.FP16 + if g.dtype == paddle.float16 else clip_var ) new_grad = paddle.multiply(x=g, y=clip_input) diff --git a/python/paddle/incubate/optimizer/distributed_fused_lamb.py b/python/paddle/incubate/optimizer/distributed_fused_lamb.py index 210cffdcb606b..905271d71876b 100644 --- a/python/paddle/incubate/optimizer/distributed_fused_lamb.py +++ b/python/paddle/incubate/optimizer/distributed_fused_lamb.py @@ -247,14 +247,14 @@ def _get_parameter(self, name, scope=None): assert master_param is not None master_param_t = scope.find_var(master_param).get_tensor() - assert master_param_t._dtype() == core.VarDesc.VarType.FP32 + assert master_param_t._dtype() == paddle.float32 param_t = scope.find_var(name).get_tensor() - if param_t._dtype() == core.VarDesc.VarType.FP32: + if param_t._dtype() == paddle.float32: assert param_t._ptr() == master_param_t._ptr() return param_t, None else: - assert param_t._dtype() == core.VarDesc.VarType.FP16 + assert param_t._dtype() == paddle.float16 assert param_t.shape() == master_param_t.shape() return param_t, master_param_t