Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

yoloV10训练报错Conv2d MUDNN failed in: RunFusion #58

Open
SChaoZh opened this issue Aug 30, 2024 · 2 comments
Open

yoloV10训练报错Conv2d MUDNN failed in: RunFusion #58

SChaoZh opened this issue Aug 30, 2024 · 2 comments

Comments

@SChaoZh
Copy link

SChaoZh commented Aug 30, 2024

yoloV10使用musify-text将cuda替换musa,再将torch.musa替换torch_musa后训练AI报错Conv2d MUDNN failed in: RunFusion
2024-08-30 22-43-33屏幕截图

RuntimeError Traceback (most recent call last)
Cell In[18], line 6
3 model = YOLOv10('./model/yolov10s.pt')
5 # yolo detect train data=coco.yaml model=yolov10n/s/m/b/l/x.yaml epochs=500 batch=256 imgsz=640 device=0,1,2,3,4,5,6,7
----> 6 model.train(model='./ultralytics/cfg/models/v10/yolov10s_mk.yaml',
7 data='./ultralytics/cfg/datasets/MK_persion.yaml',
8 epochs=128, batch=32, imgsz=320,
9 device='musa')

File /musaAI/yolov10/ultralytics/engine/model.py:657, in Model.train(self, trainer, **kwargs)
654 pass
656 self.trainer.hub_session = self.session # attach optional HUB session
--> 657 self.trainer.train()
658 # Update model and cfg after training
659 if RANK in (-1, 0):

File /musaAI/yolov10/ultralytics/engine/trainer.py:214, in BaseTrainer.train(self)
211 ddp_cleanup(self, str(file))
213 else:
--> 214 self._do_train(world_size)

File /musaAI/yolov10/ultralytics/engine/trainer.py:328, in BaseTrainer._do_train(self, world_size)
326 if world_size > 1:
327 self._setup_ddp(world_size)
--> 328 self._setup_train(world_size)
330 nb = len(self.train_loader) # number of batches
331 nw = max(round(self.args.warmup_epochs * nb), 100) if self.args.warmup_epochs > 0 else -1 # warmup iterations

File /musaAI/yolov10/ultralytics/engine/trainer.py:272, in BaseTrainer._setup_train(self, world_size)
270 if self.amp and RANK in (-1, 0): # Single-GPU and DDP
271 callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as check_amp() resets them
--> 272 self.amp = torch.tensor(check_amp(self.model), device=self.device)
273 callbacks.default_callbacks = callbacks_backup # restore callbacks
274 if RANK > -1 and world_size > 1: # DDP

File /musaAI/yolov10/ultralytics/utils/checks.py:654, in check_amp(model)
651 try:
652 from ultralytics import YOLO
--> 654 assert amp_allclose(YOLO("yolov8n.pt"), im)
655 LOGGER.info(f"{prefix}checks passed ✅")
656 except ConnectionError:

File /musaAI/yolov10/ultralytics/utils/checks.py:643, in check_amp..amp_allclose(m, im)
641 a = m(im, device=device, verbose=False)[0].boxes.data # FP32 inference
642 with torch_musa.amp.autocast(True):
--> 643 b = m(im, device=device, verbose=False)[0].boxes.data # AMP inference
644 del m
645 return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5)

File /musaAI/yolov10/ultralytics/engine/model.py:166, in Model.call(self, source, stream, **kwargs)
143 def call(
144 self,
145 source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
146 stream: bool = False,
147 **kwargs,
148 ) -> list:
149 """
150 An alias for the predict method, enabling the model instance to be callable.
151
(...)
164 (List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class.
165 """
--> 166 return self.predict(source, stream, **kwargs)

File /musaAI/yolov10/ultralytics/engine/model.py:441, in Model.predict(self, source, stream, predictor, **kwargs)
439 if prompts and hasattr(self.predictor, "set_prompts"): # for SAM-type models
440 self.predictor.set_prompts(prompts)
--> 441 return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream)

File /musaAI/yolov10/ultralytics/engine/predictor.py:168, in BasePredictor.call(self, source, model, stream, *args, **kwargs)
166 return self.stream_inference(source, model, *args, **kwargs)
167 else:
--> 168 return list(self.stream_inference(source, model, *args, **kwargs))

File /opt/conda/envs/py39/lib/python3.9/site-packages/torch/utils/_contextlib.py:35, in _wrap_generator..generator_context(*args, **kwargs)
32 try:
33 # Issuing None to a generator fires it up
34 with ctx_factory():
---> 35 response = gen.send(None)
37 while True:
38 try:
39 # Forward the response to our caller and get its next request

File /musaAI/yolov10/ultralytics/engine/predictor.py:248, in BasePredictor.stream_inference(self, source, model, *args, **kwargs)
246 # Inference
247 with profilers[1]:
--> 248 preds = self.inference(im, *args, **kwargs)
249 if self.args.embed:
250 yield from [preds] if isinstance(preds, torch.Tensor) else preds # yield embedding tensors

File /musaAI/yolov10/ultralytics/engine/predictor.py:142, in BasePredictor.inference(self, im, *args, **kwargs)
136 """Runs inference on a given image using the specified model and arguments."""
137 visualize = (
138 increment_path(self.save_dir / Path(self.batch[0][0]).stem, mkdir=True)
139 if self.args.visualize and (not self.source_type.tensor)
140 else False
141 )
--> 142 return self.model(im, augment=self.args.augment, visualize=visualize, embed=self.args.embed, *args, **kwargs)

File /opt/conda/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /musaAI/yolov10/ultralytics/nn/autobackend.py:423, in AutoBackend.forward(self, im, augment, visualize, embed)
421 # PyTorch
422 if self.pt or self.nn_module:
--> 423 y = self.model(im, augment=augment, visualize=visualize, embed=embed)
425 # TorchScript
426 elif self.jit:

File /opt/conda/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /musaAI/yolov10/ultralytics/nn/tasks.py:94, in BaseModel.forward(self, x, *args, **kwargs)
92 if isinstance(x, dict): # for cases of training and validating while training.
93 return self.loss(x, *args, **kwargs)
---> 94 return self.predict(x, *args, **kwargs)

File /musaAI/yolov10/ultralytics/nn/tasks.py:112, in BaseModel.predict(self, x, profile, visualize, augment, embed)
110 if augment:
111 return self._predict_augment(x)
--> 112 return self._predict_once(x, profile, visualize, embed)

File /musaAI/yolov10/ultralytics/nn/tasks.py:133, in BaseModel._predict_once(self, x, profile, visualize, embed)
131 if profile:
132 self._profile_one_layer(m, x, dt)
--> 133 x = m(x) # run
134 y.append(x if m.i in self.save else None) # save output
135 if visualize:

File /opt/conda/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /musaAI/yolov10/ultralytics/nn/modules/conv.py:54, in Conv.forward_fuse(self, x)
52 def forward_fuse(self, x):
53 """Perform transposed convolution of 2D data."""
---> 54 return self.act(self.conv(x))

File /opt/conda/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/envs/py39/lib/python3.9/site-packages/torch/nn/modules/conv.py:463, in Conv2d.forward(self, input)
462 def forward(self, input: Tensor) -> Tensor:
--> 463 return self._conv_forward(input, self.weight, self.bias)

File /opt/conda/envs/py39/lib/python3.9/site-packages/torch/nn/modules/conv.py:459, in Conv2d._conv_forward(self, input, weight, bias)
455 if self.padding_mode != 'zeros':
456 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
457 weight, bias, self.stride,
458 _pair(0), self.dilation, self.groups)
--> 459 return F.conv2d(input, weight, bias, self.stride,
460 self.padding, self.dilation, self.groups)

RuntimeError: Conv2d MUDNN failed in: RunFusion
2024-08-30 22-43-33屏幕截图

@SChaoZh
Copy link
Author

SChaoZh commented Aug 30, 2024

musa_version_query信息
musa_toolkits:
2024-08-30 22-48-53屏幕截图

{
"version": "2.0.0",
"git branch": "HEAD",
"git tag": "No tag",
"commit id": "35e9fd8519c162c704ee473093d53fe23fdaf224",
"commit date": "2023-12-25 17:27:09 +0800"
}
mcc:
{
"version": "2.0.0",
"git branch": "HEAD",
"git tag": "20231225_master",
"commit id": "228d4651d8fcb8511ca196a5740eef83326ce1cb",
"commit date": "2023-12-21 14:59:02 +0800"
}
mccl:
{
"version": "2.11.4",
"build archs": "--cuda-gpu-arch=mp_21",
"git branch": "HEAD",
"git tag": "No tag",
"commit id": "75a3be1470912dc394a17d0353de402d5393816a",
"commit date": "2023-12-06 19:19:46 +0800"
}
muAlg_dev:
{
"version": "0.3.0",
"git branch": "HEAD",
"commit id": "6690c26e73d430782e6b7148c57d1a0774819195",
"commit date": "2024-01-16 20:53:00 +0800"
}
muPP:
{
"version": "1.4.0",
"build archs": "21",
"git branch": "HEAD",
"git tag": "No tag",
"commit id": "1eacf78a1806cb989bb6972887cc27118bb1ffa2",
"commit date": "2023-12-25 10:15:25 +0800"
}
muThrust_dev:
{
"version": "0.3.0",
"git branch": "HEAD",
"commit id": "c6feacf2b4730028f109e059dbe26a8fb1d63763",
"commit date": "2024-01-16 20:53:06 +0800"
}
mublas:
{
"version": "1.3.0",
"build archs": "21",
"git branch": "HEAD",
"git tag": "20231225_develop",
"commit id": "cbbfe9445e6e8c5686c7ab6678b0b6a15a6b0fab",
"commit date": "2023-12-22 11:49:45 +0800"
}
mudnn:
{
"version": "2.4.0",
"git branch": "HEAD",
"git tag": "No tag",
"commit id": "6958e9e891d46f09bb1fd6c62f30a079286b0416",
"commit date": "2024-01-02 15:19:12 +0800"
}
mufft:
{
"version": "1.2.0",
"build archs": "21",
"git branch": "HEAD",
"git tag": "No tag",
"commit id": "c7397544276d036e32b89a1819b14cac9214b71d",
"commit date": "2023-11-08 11:54:16 +0800"
}
murand:
{
"version": "1.0.0",
"build archs": "21",
"git branch": "HEAD",
"git tag": "20231226_develop",
"commit id": "d2d60a4706c8e03840da689c4f1c5c99ec413966",
"commit date": "2023-11-09 12:45:33 +0800"
}
musify:
{
"version": "0.2.0",
"git branch": "HEAD",
"commit id": "564a5bcaa337a822e25b92678d43d8e37d268938",
"commit date": "2023-08-23 20:42:57 +0800"
}
musparse:
{
"version": "0.4.0",
"build archs": "21",
"git branch": "develop",
"git tag": "No tag",
"commit id": "2bf7506b3f36eca4d6160310773599febd44a179",
"commit date": "2024-01-11 12:14:57 +0800"
}
musa_runtime:
{
"version": "1.5.1",
"git branch": "HEAD",
"git tag": "No tag",
"commit id": "4a0bc1adf2b7feb5854754904febf3c62c3b010a",
"commit date": "2023-12-25 12:01:39 +0800"
}
driver_dependency:
{
"git branch": "HEAD",
"git tag": "20231219_develop",
"commit id": "4ee484c1e76a6894112f68421bb043990d166857",
"commit date": "2023-12-19 14:54:26 +0800"
}

@1823616178
Copy link

mudnn 可能不支持这个神经网络

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants