Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/stop core on components startup exception #7021

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/tribler/core/components/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(self, config: TriblerConfig = None, components: List[Component] = (
shutdown_event: Event = None, notifier: Notifier = None, failfast: bool = True):
# deepcode ignore unguarded~next~call: not necessary to catch StopIteration on infinite iterator
self.id = next(Session._next_session_id)
self.exit_code = 0
self.failfast = failfast
self.logger = logging.getLogger(self.__class__.__name__)
self.config: TriblerConfig = config or TriblerConfig()
Expand Down Expand Up @@ -162,6 +163,11 @@ def start(self):

def _reraise_startup_exception_in_separate_task(self):
async def exception_reraiser():
e = self._startup_exception
if isinstance(e, ComponentStartupException) and e.component.tribler_should_stop_on_component_error:
self.exit_code = 1
self.shutdown_event.set()

# the exception should be intercepted by event loop exception handler
raise self._startup_exception

Expand Down
2 changes: 1 addition & 1 deletion src/tribler/core/sentry_reporter/sentry_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def event_from_exception(self, exception) -> Dict:
Returns:
the event that has been saved in `_before_send` method
"""
self._logger.info(f"Event from exception: {exception}")
self._logger.debug(f"Event from exception: {exception}")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid spamming logs with multiple copies of the same error traceback

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT. As an alternative option, you can use SentryReporter.ignore_logger()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I understand it, SentryReporter.ignore_logger() affects what we send to Sentry, but does not affect our log files.


if not exception:
return {}
Expand Down
23 changes: 19 additions & 4 deletions src/tribler/core/start_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import signal
import sys
from pathlib import Path
from typing import List

from tribler.core import notifications
Expand Down Expand Up @@ -92,7 +93,12 @@ def components_gen(config: TriblerConfig):
yield GigachannelManagerComponent()


async def core_session(config: TriblerConfig, components: List[Component]):
async def core_session(config: TriblerConfig, components: List[Component]) -> int:
"""
Async task for running a new Tribler session.

Returns an exit code, which is non-zero if the Tribler session finished with an error.
"""
session = Session(config, components, failfast=False)
signal.signal(signal.SIGTERM, lambda signum, stack: session.shutdown_event.set)
async with session.start() as session:
Expand All @@ -108,12 +114,16 @@ async def core_session(config: TriblerConfig, components: List[Component]):
session.notifier[notifications.tribler_shutdown_state]("Saving configuration...")
config.write()

return session.exit_code


def run_tribler_core_session(api_port, api_key, state_dir, gui_test_mode=False):
def run_tribler_core_session(api_port: str, api_key: str, state_dir: Path, gui_test_mode: bool = False) -> int:
"""
This method will start a new Tribler session.
Note that there is no direct communication between the GUI process and the core: all communication is performed
through the HTTP API.

Returns an exit code value, which is non-zero if the Tribler session finished with an error.
"""
logger.info(f'Start tribler core. API port: "{api_port}". '
f'API key: "{api_key}". State dir: "{state_dir}". '
Expand Down Expand Up @@ -154,7 +164,7 @@ def run_tribler_core_session(api_port, api_key, state_dir, gui_test_mode=False):
loop.set_exception_handler(exception_handler.unhandled_error_observer)

try:
loop.run_until_complete(core_session(config, components=list(components_gen(config))))
exit_code = loop.run_until_complete(core_session(config, components=list(components_gen(config))))
finally:
if trace_logger:
trace_logger.close()
Expand All @@ -163,6 +173,8 @@ def run_tribler_core_session(api_port, api_key, state_dir, gui_test_mode=False):
for handler in logging.getLogger().handlers:
handler.flush()

return exit_code


def run_core(api_port, api_key, root_state_dir, parsed_args):
logger.info('Running Core' + ' in gui_test_mode' if parsed_args.gui_test_mode else '')
Expand All @@ -171,4 +183,7 @@ def run_core(api_port, api_key, root_state_dir, parsed_args):
with single_tribler_instance(root_state_dir):
version_history = VersionHistory(root_state_dir)
state_dir = version_history.code_version.directory
run_tribler_core_session(api_port, api_key, state_dir, gui_test_mode=parsed_args.gui_test_mode)
exit_code = run_tribler_core_session(api_port, api_key, state_dir, gui_test_mode=parsed_args.gui_test_mode)

if exit_code:
sys.exit(exit_code)
45 changes: 28 additions & 17 deletions src/tribler/gui/core_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import re
import sys
from collections import deque
from pathlib import Path
from typing import Optional

Expand All @@ -16,6 +17,9 @@
from tribler.gui.utilities import connect


CORE_OUTPUT_DEQUE_LENGTH = 10


class CoreManager(QObject):
"""
The CoreManager is responsible for managing the Tribler core (starting/stopping). When we are running the GUI tests,
Expand Down Expand Up @@ -47,8 +51,8 @@ def __init__(self, root_state_dir: Path, api_port: int, api_key: str, app_manage
self.should_quit_app_on_core_finished = False

self.use_existing_core = True
self.last_core_stdout_output: str = ''
self.last_core_stderr_output: str = ''
self.last_core_stdout_output: deque = deque(maxlen=CORE_OUTPUT_DEQUE_LENGTH)
self.last_core_stderr_output: deque = deque(maxlen=CORE_OUTPUT_DEQUE_LENGTH)
Copy link
Contributor Author

@kozlovsky kozlovsky Aug 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The last write to stderr does not necessarily contain the actual exception traceback, so we store several writes.


connect(self.events_manager.core_connected, self.on_core_connected)

Expand Down Expand Up @@ -125,10 +129,11 @@ def on_core_stdout_read_ready(self):
return

raw_output = bytes(self.core_process.readAllStandardOutput())
self.last_core_stdout_output = self.decode_raw_core_output(raw_output).strip()
output = self.decode_raw_core_output(raw_output).strip()
self.last_core_stdout_output.append(output)

try:
print(self.last_core_stdout_output) # print core output # noqa: T001
print(output) # print core output # noqa: T001
except OSError:
# Possible reason - cannot write to stdout as it was already closed during the application shutdown
pass
Expand All @@ -139,10 +144,11 @@ def on_core_stderr_read_ready(self):
return

raw_output = bytes(self.core_process.readAllStandardError())
self.last_core_stderr_output = self.decode_raw_core_output(raw_output).strip()
output = self.decode_raw_core_output(raw_output).strip()
self.last_core_stderr_output.append(output)

try:
print(self.last_core_stderr_output, file=sys.stderr) # print core output # noqa: T001
print(output, file=sys.stderr) # print core output # noqa: T001
except OSError:
# Possible reason - cannot write to stdout as it was already closed during the application shutdown
pass
Expand Down Expand Up @@ -196,18 +202,24 @@ def kill_core_process_and_remove_the_lock_file(self):
process_checker = ProcessChecker(self.root_state_dir)
process_checker.remove_lock()

def get_last_core_output(self, quoted=True):
output = ''.join(self.last_core_stderr_output) or ''.join(self.last_core_stdout_output)
if quoted:
output = re.sub(r'^', '> ', output, flags=re.MULTILINE)
return output

@staticmethod
def format_error_message(exit_code: int, exit_status: int, last_core_output: str) -> str:
def format_error_message(exit_code: int, exit_status: int) -> str:
message = f"The Tribler core has unexpectedly finished with exit code {exit_code} and status: {exit_status}."
try:
string_error = os.strerror(exit_code)
except ValueError:
# On platforms where strerror() returns NULL when given an unknown error number, ValueError is raised.
string_error = 'unknown error number'
if exit_code == 1:
string_error = "Application error"
else:
try:
string_error = os.strerror(exit_code)
except ValueError:
# On platforms where strerror() returns NULL when given an unknown error number, ValueError is raised.
string_error = 'unknown error number'
message += f'\n\nError message: {string_error}'

quoted_output = re.sub(r'^', '> ', last_core_output, flags=re.MULTILINE)
message += f"\n\nLast core output:\n{quoted_output}"
Comment on lines -209 to -210
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR moves the rendering of the last core output to gui_error, as we need it not only for CoreCrashedError but for other subclasses of CoreError as well, for example, for CoreConnectTimeoutError.

return message

def on_core_finished(self, exit_code, exit_status):
Expand All @@ -218,8 +230,7 @@ def on_core_finished(self, exit_code, exit_status):
if self.should_quit_app_on_core_finished:
self.app_manager.quit_application()
else:
output = self.last_core_stderr_output or self.last_core_stdout_output
error_message = self.format_error_message(exit_code, exit_status, output)
error_message = self.format_error_message(exit_code, exit_status)
self._logger.warning(error_message)

if not self.app_manager.quitting_app:
Expand Down
2 changes: 1 addition & 1 deletion src/tribler/gui/error_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def gui_error(self, *exc_info):

is_core_exception = issubclass(info_type, CoreError)
if is_core_exception:
text = text + self.tribler_window.core_manager.last_core_stderr_output
text = f'{text}\n\nLast Core output:\n{self.tribler_window.core_manager.get_last_core_output()}'
self._stop_tribler(text)

reported_error = ReportedError(
Expand Down