Skip to content

Commit

Permalink
attach empty grad to its param to ensure it's copied after reduction (m…
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffra authored Aug 13, 2020
1 parent 6855ba1 commit e1bea67
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions deepspeed/pt/deepspeed_light.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,10 +1013,10 @@ def buffered_allreduce_fallback(self, grads=None, elements_per_buffer=500000000)
# rank is reducing the same size. In some cases it may make
# sense in the future to support the ability to average not
# w.r.t. world size but with a different value.
grads.append(
torch.zeros(param.size(),
dtype=param.dtype,
device=param.device))
param.grad = torch.zeros(param.size(),
dtype=param.dtype,
device=param.device)
grads.append(param.grad.data)
else:
grad_data = param.grad.data
if self.sparse_gradients_enabled(
Expand Down

0 comments on commit e1bea67

Please sign in to comment.