You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When tests are run on a CPU only machine, we get tests/test_integration.py::test_denovo - torch.multiprocessing.spawn.ProcessRaisedException. The trace is below:
========================================= FAILURES ==========================================
________________________________________ test_denovo ________________________________________
mgf_small = PosixPath('/tmp/pytest-of-melih/pytest-0/test_denovo0/small.mgf')
tmp_path = PosixPath('/tmp/pytest-of-melih/pytest-0/test_denovo0')
monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7f8515818940>
def test_denovo(mgf_small, tmp_path, monkeypatch):
# We can use this to explicitly test different versions.
monkeypatch.setattr(casanovo, "__version__", "3.0.1")
# Predict on a small MGF file and verify that the output file exists.
output_filename = tmp_path / "test.mztab"
> casanovo.main(
[
"--mode",
"denovo",
"--peak_path",
str(mgf_small),
"--output",
str(output_filename),
],
standalone_mode=False,
)
tests/test_integration.py:13:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/click/core.py:1130: in __call__
return self.main(*args, **kwargs)
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/click/core.py:1055: in main
rv = self.invoke(ctx)
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/click/core.py:1404: in invoke
return ctx.invoke(self.callback, **ctx.params)
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/click/core.py:760: in invoke
return __callback(*args, **kwargs)
casanovo/casanovo.py:222: in main
model_runner.predict(peak_path, model, config, writer)
casanovo/denovo/model_runner.py:46: in predict
_execute_existing(peak_path, model_filename, config, False, out_writer)
casanovo/denovo/model_runner.py:166: in _execute_existing
run_trainer(model, loaders.test_dataloader())
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:949: in predict
return self._call_and_handle_interrupt(
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:648: in _call_and_handle_interrupt
return self.strategy.launcher.launch(trainer_fn, *args, trainer=self, **kwargs)
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py:107: in launch
mp.start_processes(
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/torch/multiprocessing/spawn.py:198: in start_processes
while not context.join():
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <torch.multiprocessing.spawn.ProcessContext object at 0x7f85157a25e0>, timeout = None
def join(self, timeout=None):
r"""
Tries to join one or more processes in this spawn context.
If one of them exited with a non-zero exit status, this function
kills the remaining processes and raises an exception with the cause
of the first process exiting.
Returns ``True`` if all processes have been joined successfully,
``False`` if there are more processes that need to be joined.
Args:
timeout (float): Wait this long before giving up on waiting.
"""
# Ensure this function can be called even when we're done.
if len(self.sentinels) == 0:
return True
# Wait for any process to fail or all of them to succeed.
ready = multiprocessing.connection.wait(
self.sentinels.keys(),
timeout=timeout,
)
error_index = None
for sentinel in ready:
index = self.sentinels.pop(sentinel)
process = self.processes[index]
process.join()
if process.exitcode != 0:
error_index = index
break
# Return if there was no error.
if error_index is None:
# Return whether or not all processes have been joined.
return len(self.sentinels) == 0
# Assume failure. Terminate processes that are still alive.
for process in self.processes:
if process.is_alive():
process.terminate()
process.join()
# There won't be an error on the queue if the process crashed.
failed_process = self.processes[error_index]
if self.error_queues[error_index].empty():
exitcode = self.processes[error_index].exitcode
if exitcode < 0:
name = signal.Signals(-exitcode).name
raise ProcessExitedException(
"process %d terminated with signal %s" %
(error_index, name),
error_index=error_index,
error_pid=failed_process.pid,
exit_code=exitcode,
signal_name=name
)
else:
raise ProcessExitedException(
"process %d terminated with exit code %d" %
(error_index, exitcode),
error_index=error_index,
error_pid=failed_process.pid,
exit_code=exitcode
)
original_trace = self.error_queues[error_index].get()
msg = "\n\n-- Process %d terminated with the following error:\n" % error_index
msg += original_trace
> raise ProcessRaisedException(msg, error_index, failed_process.pid)
E torch.multiprocessing.spawn.ProcessRaisedException:
E
E -- Process 2 terminated with the following error:
E Traceback (most recent call last):
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
E fn(i, *args)
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 133, in _wrapping_function
E results = function(*args, **kwargs)
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 996, in _predict_impl
E results = self._run(model, ckpt_path=self.ckpt_path)
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1166, in _run
E results = self._run_stage()
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1251, in _run_stage
E return self._run_predict()
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1307, in _run_predict
E self.reset_predict_dataloader(self.lightning_module)
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1961, in reset_predict_dataloader
E self.num_predict_batches, self.predict_dataloaders = self._data_connector._reset_eval_dataloader(
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 385, in _reset_eval_dataloader
E dataloaders = [self._prepare_dataloader(dl, mode=mode) for dl in dataloaders if dl is not None]
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 385, in <listcomp>
E dataloaders = [self._prepare_dataloader(dl, mode=mode) for dl in dataloaders if dl is not None]
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 295, in _prepare_dataloader
E sampler = self._resolve_sampler(dataloader, shuffle=shuffle, mode=mode)
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 308, in _resolve_sampler
E sampler = self._get_distributed_sampler(
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 347, in _get_distributed_sampler
E sampler = cls(dataloader.sampler, **kwargs)
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/overrides/distributed.py", line 165, in __init__
E super().__init__(_DatasetSamplerWrapper(sampler), *args, **kwargs)
E File "/net/noble/vol1/home/melih/anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/pytorch_lightning/overrides/distributed.py", line 85, in __init__
E assert self.num_samples >= 1 or self.total_size == 0
E AssertionError
../../../anaconda3/envs/casanovo_dev/lib/python3.8/site-packages/torch/multiprocessing/spawn.py:160: ProcessRaisedException
The text was updated successfully, but these errors were encountered:
When tests are run on a CPU only machine, we get
tests/test_integration.py::test_denovo - torch.multiprocessing.spawn.ProcessRaisedException
. The trace is below:The text was updated successfully, but these errors were encountered: