Skip to content

Commit

Permalink
Add an error when a node fails when using dora run (#719)
Browse files Browse the repository at this point in the history
Currently, when using `dora run` a node failure does not raise any
warning nor error.

This makes errors silent.

This PR makes the error explicit.
  • Loading branch information
haixuanTao authored Nov 27, 2024
2 parents 84ec354 + 5f76ea3 commit 206be36
Showing 1 changed file with 32 additions and 10 deletions.
42 changes: 32 additions & 10 deletions binaries/daemon/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,40 @@ impl Daemon {
})?;
socket_stream_send(connection, &msg)
.await
.wrap_err("failed to send watchdog message to dora-coordinator")?;
.wrap_err("failed to send log message to dora-coordinator")?;

if self.last_coordinator_heartbeat.elapsed() > Duration::from_secs(20) {
bail!("lost connection to coordinator")
}
} else {
match message.level {
LogLevel::Error => {
if let Some(node_id) = message.node_id {
tracing::error!("{}/{} errored:", message.dataflow_id.to_string(), node_id);
}
for line in message.message.lines() {
tracing::error!(" {}", line);
}
}
LogLevel::Warn => {
if let Some(node_id) = message.node_id {
tracing::warn!("{}/{} warned:", message.dataflow_id.to_string(), node_id);
}
for line in message.message.lines() {
tracing::warn!(" {}", line);
}
}
LogLevel::Info => {
if let Some(node_id) = message.node_id {
tracing::info!("{}/{} info:", message.dataflow_id.to_string(), node_id);
}

for line in message.message.lines() {
tracing::info!(" {}", line);
}
}
_ => {}
}
}
Ok(())
}
Expand Down Expand Up @@ -1237,10 +1266,7 @@ impl Daemon {
exit_status,
} => {
let node_result = match exit_status {
NodeExitStatus::Success => {
tracing::info!("node {dataflow_id}/{node_id} finished successfully");
Ok(())
}
NodeExitStatus::Success => Ok(()),
exit_status => {
let dataflow = self.running.get(&dataflow_id);
let caused_by_node = dataflow
Expand Down Expand Up @@ -1274,10 +1300,6 @@ impl Daemon {
})
.unwrap_or_default();

tracing::error!("node {dataflow_id}/{node_id} failed with:");
for line in cause.lines() {
tracing::error!(" {}", line);
}
NodeErrorCause::Other { stderr: cause }
}
};
Expand All @@ -1302,7 +1324,7 @@ impl Daemon {
file: None,
line: None,
message: match &node_result {
Ok(()) => "node finished successfully".to_string(),
Ok(()) => format!("{node_id} finished successfully"),
Err(err) => format!("{err}"),
},
})
Expand Down

0 comments on commit 206be36

Please sign in to comment.