You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
There have been some major changes for PyTorch 2.0.x that break some of the reversible operations.
I should investigate how to fix these.
What I Did
Run CI tests for PyTorch 2.0.x
tmp_path = PosixPath('/tmp/pytest-of-circleci/pytest-0/test_train_networks_False_revn0')
network = 'revnet38', use_cuda = False
@pytest.mark.parametrize("network", [
pytest.param(network,
marks=pytest.mark.skipif(
condition=("FULL_NETWORK_TESTS" not in os.environ) and ("revnet38" != network),
reason="Too memory intensive for CI so these tests are disabled by default. "
"Set FULL_NETWORK_TESTS environment variable to enable the tests.")
)
for network in ["resnet32", "resnet110", "resnet164", "revnet38", "revnet110", "revnet164"]
])
@pytest.mark.parametrize("use_cuda", [
False,
pytest.param(True, marks=pytest.mark.skipif(condition=not torch.cuda.is_available(), reason="No GPU available"))
])
def test_train_networks(tmp_path, network, use_cuda):
exptags = ["cifar10", network, "epoch5"]
exp_file = str(Path(__file__).parent / "resources" / "experiments.json")
data_dir = str(tmp_path / "tmpdata")
results_dir = str(tmp_path / "resdir")
os.makedirs(data_dir)
os.makedirs(results_dir)
> run_experiment(experiment_tags=exptags, data_dir=data_dir, results_dir=results_dir,
start_fresh=True, use_cuda=use_cuda, workers=None, experiments_file=exp_file,
disp_iter=1,
save_iter=5,
valid_iter=5,)
memcnn/trainers/tests/test_train.py:89:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
memcnn/train.py:48: in run_experiment
trainer(manager, start_iter=last_iter, use_cuda=use_cuda, *args, **trainer_params)
memcnn/trainers/classification.py:111: in train
loss.backward()
.tox/py38-torchlatest/lib/python3.8/site-packages/torch/_tensor.py:487: in backward
torch.autograd.backward(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tensors = (tensor(2.3590, grad_fn=<MulBackward0>),), grad_tensors = None
retain_graph = False, create_graph = False, grad_variables = None, inputs = ()
def backward(
tensors: _TensorOrTensors,
grad_tensors: Optional[_TensorOrTensors] = None,
retain_graph: Optional[bool] = None,
create_graph: bool = False,
grad_variables: Optional[_TensorOrTensors] = None,
inputs: Optional[_TensorOrTensors] = None,
) -> None:
r"""Computes the sum of gradients of given tensors with respect to graph
leaves.
The graph is differentiated using the chain rule. If any of ``tensors``
are non-scalar (i.e. their data has more than one element) and require
gradient, then the Jacobian-vector product would be computed, in this
case the function additionally requires specifying ``grad_tensors``.
It should be a sequence of matching length, that contains the "vector"
in the Jacobian-vector product, usually the gradient of the differentiated
function w.r.t. corresponding tensors (``None`` is an acceptable value for
all tensors that don't need gradient tensors).
This function accumulates gradients in the leaves - you might need to zero
``.grad`` attributes or set them to ``None`` before calling it.
See :ref:`Default gradient layouts<default-grad-layouts>`
for details on the memory layout of accumulated gradients.
.. note::
Using this method with ``create_graph=True`` will create a reference cycle
between the parameter and its gradient which can cause a memory leak.
We recommend using ``autograd.grad`` when creating the graph to avoid this.
If you have to use this function, make sure to reset the ``.grad`` fields of your
parameters to ``None`` after use to break the cycle and avoid the leak.
.. note::
If you run any forward ops, create ``grad_tensors``, and/or call ``backward``
in a user-specified CUDA stream context, see
:ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.
.. note::
When ``inputs`` are provided and a given input is not a leaf,
the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients).
It is an implementation detail on which the user should not rely.
See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.
Args:
tensors (Sequence[Tensor] or Tensor): Tensors of which the derivative will be
computed.
grad_tensors (Sequence[Tensor or None] or Tensor, optional): The "vector" in
the Jacobian-vector product, usually gradients w.r.t. each element of
corresponding tensors. None values can be specified for scalar Tensors or
ones that don't require grad. If a None value would be acceptable for all
grad_tensors, then this argument is optional.
retain_graph (bool, optional): If ``False``, the graph used to compute the grad
will be freed. Note that in nearly all cases setting this option to ``True``
is not needed and often can be worked around in a much more efficient
way. Defaults to the value of ``create_graph``.
create_graph (bool, optional): If ``True``, graph of the derivative will
be constructed, allowing to compute higher order derivative products.
Defaults to ``False``.
inputs (Sequence[Tensor] or Tensor, optional): Inputs w.r.t. which the gradient
be will accumulated into ``.grad``. All other Tensors will be ignored. If
not provided, the gradient is accumulated into all the leaf Tensors that
were used to compute the attr::tensors.
"""
if torch._C._are_functorch_transforms_active():
raise RuntimeError(
"backward() called inside a functorch transform. This is not "
"supported, please use functorch.grad or functorch.vjp instead "
"or call backward() outside of functorch transforms.")
if grad_variables is not None:
warnings.warn("'grad_variables' is deprecated. Use 'grad_tensors' instead.")
if grad_tensors is None:
grad_tensors = grad_variables
else:
raise RuntimeError("'grad_tensors' and 'grad_variables' (deprecated) "
"arguments both passed to backward(). Please only "
"use 'grad_tensors'.")
if inputs is not None and len(inputs) == 0:
raise RuntimeError("'inputs' argument to backward() cannot be empty.")
tensors = (tensors,) if isinstance(tensors, torch.Tensor) else tuple(tensors)
inputs = (inputs,) if isinstance(inputs, torch.Tensor) else \
tuple(inputs) if inputs is not None else tuple()
grad_tensors_ = _tensor_or_tensors_to_tuple(grad_tensors, len(tensors))
grad_tensors_ = _make_grads(tensors, grad_tensors_, is_grads_batched=False)
if retain_graph is None:
retain_graph = create_graph
# The reason we repeat same the comment below is that
# some Python versions print out the first line of a multi-line function
# calls in the traceback and some print out the last line
> Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
tensors, grad_tensors_, retain_graph, create_graph, inputs,
allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass
E RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [100, 32, 32, 32]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
The text was updated successfully, but these errors were encountered:
Description
There have been some major changes for PyTorch 2.0.x that break some of the reversible operations.
I should investigate how to fix these.
What I Did
Run CI tests for PyTorch 2.0.x
The text was updated successfully, but these errors were encountered: