You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am using the LOTSA dataset along with my own dataset (around 3GB) for pre-training. After training for a certain number of epochs, I encounter the following error.
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/python_workspace/uni2ts/src/uni2ts/model/moirai/module.py", line 177, in forward
distr = self.distr_output.distribution(distr_param, loc=loc, scale=scale)
File "/share/home/defaultTenant/caiyx/python_workspace/uni2ts/src/uni2ts/distribution/_base.py", line 171, in distribution
distr = self._distribution(distr_params, validate_args=validate_args)
File "/share/home/defaultTenant/caiyx/python_workspace/uni2ts/src/uni2ts/distribution/mixture.py", line 182, in _distribution
weights=Categorical(
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/distributions/categorical.py", line 72, in init
super().init(batch_shape, validate_args=validate_args)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/distributions/distribution.py", line 71, in init
raise ValueError(
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 574, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/core/optimizer.py", line 153, in step
step_output = self._strategy.optimizer_step(self._optimizer, closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/strategies/ddp.py", line 270, in optimizer_step
optimizer_output = super().optimizer_step(optimizer, closure, model, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 238, in optimizer_step
return self.precision_plugin.optimizer_step(optimizer, model=model, closure=closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/precision.py", line 122, in optimizer_step
return optimizer.step(closure=closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/lr_scheduler.py", line 137, in wrapper
return func.get(opt, opt.class)(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/optimizer.py", line 487, in wrapper
out = func(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/optimizer.py", line 91, in _use_grad
ret = func(self, *args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/adamw.py", line 197, in step
loss = closure()
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/precision.py", line 108, in _wrap_closure
closure_result = closure()
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/precision.py", line 122, in optimizer_step
return optimizer.step(closure=closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/lr_scheduler.py", line 137, in wrapper
return func.get(opt, opt.class)(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/optimizer.py", line 487, in wrapper
out = func(*args, **kwargs)
ValueError: Expected parameter logits (Tensor of shape (32, 512, 128, 4)) of distribution Categorical(logits: torch.Size([32, 512, 128, 4])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[[[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan],
...,
I think the issue is with my own dataset, but I'm not sure what the specific problem is. Could you help me identify the exact issue with the dataset?
You can check this discussion thread #19. I suggest you to add +trainer.detect_anomaly=True flag during pre-training, the stacktrace message would be helpful to locate the root cause.
I am using the LOTSA dataset along with my own dataset (around 3GB) for pre-training. After training for a certain number of epochs, I encounter the following error.
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/python_workspace/uni2ts/src/uni2ts/model/moirai/module.py", line 177, in forward
distr = self.distr_output.distribution(distr_param, loc=loc, scale=scale)
File "/share/home/defaultTenant/caiyx/python_workspace/uni2ts/src/uni2ts/distribution/_base.py", line 171, in distribution
distr = self._distribution(distr_params, validate_args=validate_args)
File "/share/home/defaultTenant/caiyx/python_workspace/uni2ts/src/uni2ts/distribution/mixture.py", line 182, in _distribution
weights=Categorical(
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/distributions/categorical.py", line 72, in init
super().init(batch_shape, validate_args=validate_args)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/distributions/distribution.py", line 71, in init
raise ValueError(
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 574, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/core/optimizer.py", line 153, in step
step_output = self._strategy.optimizer_step(self._optimizer, closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/strategies/ddp.py", line 270, in optimizer_step
optimizer_output = super().optimizer_step(optimizer, closure, model, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 238, in optimizer_step
return self.precision_plugin.optimizer_step(optimizer, model=model, closure=closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/precision.py", line 122, in optimizer_step
return optimizer.step(closure=closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/lr_scheduler.py", line 137, in wrapper
return func.get(opt, opt.class)(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/optimizer.py", line 487, in wrapper
out = func(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/optimizer.py", line 91, in _use_grad
ret = func(self, *args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/adamw.py", line 197, in step
loss = closure()
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/precision.py", line 108, in _wrap_closure
closure_result = closure()
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/precision.py", line 122, in optimizer_step
return optimizer.step(closure=closure, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/lr_scheduler.py", line 137, in wrapper
return func.get(opt, opt.class)(*args, **kwargs)
File "/share/home/defaultTenant/caiyx/.conda/envs/uni2ts/lib/python3.10/site-packages/torch/optim/optimizer.py", line 487, in wrapper
out = func(*args, **kwargs)
ValueError: Expected parameter logits (Tensor of shape (32, 512, 128, 4)) of distribution Categorical(logits: torch.Size([32, 512, 128, 4])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[[[nan, nan, nan, nan],
[nan, nan, nan, nan],
[nan, nan, nan, nan],
...,
I think the issue is with my own dataset, but I'm not sure what the specific problem is. Could you help me identify the exact issue with the dataset?
Below is the code I used to build the dataset.
The text was updated successfully, but these errors were encountered: