diff --git a/python/ray/log_monitor.py b/python/ray/log_monitor.py index 421231686ec14..8b6b3547e744d 100644 --- a/python/ray/log_monitor.py +++ b/python/ray/log_monitor.py @@ -90,10 +90,12 @@ def close_all_files(self): def update_log_filenames(self): """Update the list of log files to monitor.""" - # we only monior worker log files + # output of user code is written here log_file_paths = glob.glob("{}/worker*[.out|.err]".format( self.logs_dir)) - for file_path in log_file_paths: + # segfaults and other serious errors are logged here + raylet_err_paths = glob.glob("{}/raylet*.err".format(self.logs_dir)) + for file_path in log_file_paths + raylet_err_paths: if os.path.isfile( file_path) and file_path not in self.log_filenames: self.log_filenames.add(file_path) @@ -195,6 +197,8 @@ def check_log_files_and_publish_updates(self): file_info.worker_pid = int( lines_to_publish[0].split(" ")[-1]) lines_to_publish = lines_to_publish[1:] + elif "/raylet" in file_info.filename: + file_info.worker_pid = "raylet" # Record the current position in the file. file_info.file_position = file_info.file_handle.tell() diff --git a/python/ray/worker.py b/python/ray/worker.py index 91117aca1c97b..00fad481bec11 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -1594,15 +1594,22 @@ def print_logs(redis_client, threads_stopped): num_consecutive_messages_received += 1 data = json.loads(ray.utils.decode(msg["data"])) + + def color_for(data): + if data["pid"] == "raylet": + return colorama.Fore.YELLOW + else: + return colorama.Fore.CYAN + if data["ip"] == localhost: for line in data["lines"]: print("{}{}(pid={}){} {}".format( - colorama.Style.DIM, colorama.Fore.CYAN, data["pid"], + colorama.Style.DIM, color_for(data), data["pid"], colorama.Style.RESET_ALL, line)) else: for line in data["lines"]: print("{}{}(pid={}, ip={}){} {}".format( - colorama.Style.DIM, colorama.Fore.CYAN, data["pid"], + colorama.Style.DIM, color_for(data), data["pid"], data["ip"], colorama.Style.RESET_ALL, line)) if (num_consecutive_messages_received % 100 == 0 diff --git a/src/ray/util/logging.cc b/src/ray/util/logging.cc index 7311181c15f3e..e57712d844639 100644 --- a/src/ray/util/logging.cc +++ b/src/ray/util/logging.cc @@ -11,11 +11,28 @@ #include #ifdef RAY_USE_GLOG +#include #include "glog/logging.h" #endif namespace ray { +#ifdef RAY_USE_GLOG +struct StdoutLogger : public google::base::Logger { + virtual void Write(bool /* should flush */, time_t /* timestamp */, const char *message, + int length) { + // note: always flush otherwise it never shows up in raylet.out + std::cout << std::string(message, length) << std::flush; + } + + virtual void Flush() { std::cout.flush(); } + + virtual google::uint32 LogSize() { return 0; } +}; + +static StdoutLogger stdout_logger_singleton; +#endif + // This is the default implementation of ray log, // which is independent of any libs. class CerrLog { @@ -122,7 +139,12 @@ void RayLog::StartRayLog(const std::string &app_name, RayLogLevel severity_thres #ifdef RAY_USE_GLOG google::InitGoogleLogging(app_name_.c_str()); int mapped_severity_threshold = GetMappedSeverity(severity_threshold_); - google::SetStderrLogging(mapped_severity_threshold); + google::SetStderrLogging(GetMappedSeverity(RayLogLevel::ERROR)); + for (int i = static_cast(severity_threshold_); + i <= static_cast(RayLogLevel::FATAL); ++i) { + int level = GetMappedSeverity(static_cast(i)); + google::base::SetLogger(level, &stdout_logger_singleton); + } // Enable log file if log_dir_ is not empty. if (!log_dir_.empty()) { auto dir_ends_with_slash = log_dir_;