Skip to content

Commit

Permalink
Add log file for important notifications (#1982)
Browse files Browse the repository at this point in the history
Add a new log file for important notifications that require an action from
the node operator.

Using a separate log file makes it easier than grepping specific messages
from the standard logs, and lets us use a different style of messaging,
where we provide more information about what steps to take to resolve
the issue.

We rely on an event sent to the event stream so that plugins can also pick
it up and connect with notification systems (push, messages, mails, etc).
  • Loading branch information
t-bast authored Oct 25, 2021
1 parent 494e346 commit 765a0c5
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 28 deletions.
8 changes: 7 additions & 1 deletion docs/release-notes/eclair-vnext.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@

## Major changes

<insert changes>
### Separate log for important notifications

Eclair added a new log file (`notifications.log`) for important notifications that require an action from the node operator.
Node operators should watch this file very regularly.

An event is also sent to the event stream for every such notification.
This lets plugins notify the node operator via external systems (push notifications, email, etc).

### API changes

Expand Down
49 changes: 44 additions & 5 deletions eclair-core/src/main/scala/fr/acinq/eclair/Logs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package fr.acinq.eclair

import akka.actor.typed.Behavior
import akka.actor.typed.eventstream.EventStream
import akka.actor.typed.scaladsl.Behaviors
import akka.event.DiagnosticLoggingAdapter
import akka.io.Tcp
import fr.acinq.bitcoin.ByteVector32
Expand All @@ -28,16 +31,17 @@ import fr.acinq.eclair.io.{Peer, PeerConnection}
import fr.acinq.eclair.router.Router._
import fr.acinq.eclair.router._
import fr.acinq.eclair.wire.protocol._
import grizzled.slf4j.Logger

import java.util.UUID

object Logs {

/**
* @param parentPaymentId_opt depending on the context, this may be:
* - for a send : the parent payment id
* - for a channel-relay : the relay id
* - for a trampoline-relay : the relay id and the parent payment id of the outgoing payment
* - for a send: the parent payment id
* - for a channel-relay: the relay id
* - for a trampoline-relay: the relay id and the parent payment id of the outgoing payment
*/
def mdc(category_opt: Option[LogCategory] = None, remoteNodeId_opt: Option[PublicKey] = None, channelId_opt: Option[ByteVector32] = None, parentPaymentId_opt: Option[UUID] = None, paymentId_opt: Option[UUID] = None, paymentHash_opt: Option[ByteVector32] = None): Map[String, String] =
Seq(
Expand Down Expand Up @@ -77,7 +81,6 @@ object Logs {
}
}

// @formatter: off
sealed trait LogCategory {
def category: String
}
Expand Down Expand Up @@ -138,8 +141,44 @@ object Logs {
}
}

// @formatter: on
}

// we use a dedicated class so that the logging can be independently adjusted
case class Diagnostics()

object NotificationsLogger {

// @formatter:off
sealed trait Severity
case object Info extends Severity
case object Warning extends Severity
case object Error extends Severity
// @formatter:on

/** This event should be used to send important notifications for the node operator. */
case class NotifyNodeOperator(severity: Severity, message: String)

/** This logger should be used to log important notifications for the node operator. */
private val log = Logger("notifications")

/**
* Use this function instead of the [[NotifyNodeOperator]] event when a fatal error leads to stopping eclair immediately.
* Otherwise the actor wouldn't have time to handle the notification and log it before the actor system is shutdown.
*/
def logFatalError(message: String, t: Throwable): Unit = log.error(message, t)

def apply(): Behavior[NotifyNodeOperator] =
Behaviors.setup { context =>
context.system.eventStream ! EventStream.Subscribe(context.self)
Behaviors.receiveMessage {
case NotifyNodeOperator(severity, message) =>
severity match {
case Info => log.info(message)
case Warning => log.warn(message)
case Error => log.error(message)
}
Behaviors.same
}
}

}
3 changes: 3 additions & 0 deletions eclair-core/src/main/scala/fr/acinq/eclair/Setup.scala
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ class Setup(val datadir: File,
randomGen.init()
system.spawn(Behaviors.supervise(WeakEntropyPool(randomGen)).onFailure(typed.SupervisorStrategy.restart), "entropy-pool")

// start a system-wide actor to collect and log important notifications for the node operator
system.spawn(Behaviors.supervise(NotificationsLogger()).onFailure(typed.SupervisorStrategy.restart), "notifications-logger")

datadir.mkdirs()
val config = system.settings.config.getConfig("eclair")
val Seeds(nodeSeed, channelSeed) = seeds_opt.getOrElse(NodeParams.getSeeds(datadir))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package fr.acinq.eclair.balance

import akka.actor.typed.eventstream.EventStream
import akka.actor.typed.scaladsl.{ActorContext, Behaviors}
import akka.actor.typed.{ActorRef, Behavior}
import fr.acinq.bitcoin.ByteVector32
import fr.acinq.bitcoin.{ByteVector32, MilliBtcDouble}
import fr.acinq.eclair.NotificationsLogger
import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator
import fr.acinq.eclair.balance.BalanceActor._
import fr.acinq.eclair.balance.CheckBalance.GlobalBalance
import fr.acinq.eclair.balance.Monitoring.{Metrics, Tags}
Expand Down Expand Up @@ -83,6 +86,9 @@ private class BalanceActor(context: ActorContext[Command],
case Success(result) =>
log.info("current balance: total={} onchain.confirmed={} onchain.unconfirmed={} offchain={}", result.total.toDouble, result.onChain.confirmed.toDouble, result.onChain.unconfirmed.toDouble, result.offChain.total.toDouble)
log.debug("current balance details : {}", result)
if (result.onChain.confirmed < 1.millibtc) {
context.system.eventStream ! EventStream.Publish(NotifyNodeOperator(NotificationsLogger.Warning, s"on-chain confirmed balance is low (${result.onChain.confirmed.toMilliBtc}), eclair may not be able to guarantee funds safety in case channels force-close: you should add some utxos to your bitcoin wallet"))
}
Metrics.GlobalBalance.withoutTags().update(result.total.toMilliBtc.toDouble)
Metrics.GlobalBalanceDetailed.withTag(Tags.BalanceType, Tags.BalanceTypes.OnchainConfirmed).update(result.onChain.confirmed.toMilliBtc.toDouble)
Metrics.GlobalBalanceDetailed.withTag(Tags.BalanceType, Tags.BalanceTypes.OnchainUnconfirmed).update(result.onChain.unconfirmed.toMilliBtc.toDouble)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ package fr.acinq.eclair.blockchain.watchdogs
import akka.actor.typed.Behavior
import akka.actor.typed.eventstream.EventStream
import akka.actor.typed.scaladsl.Behaviors
import com.softwaremill.sttp.SttpBackend
import fr.acinq.bitcoin.BlockHeader
import fr.acinq.eclair.NodeParams
import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator
import fr.acinq.eclair.blockchain.CurrentBlockCount
import fr.acinq.eclair.blockchain.watchdogs.Monitoring.{Metrics, Tags}
import fr.acinq.eclair.tor.Socks5ProxyParams
import fr.acinq.eclair.{NodeParams, NotificationsLogger}

import java.util.UUID
import scala.concurrent.Future
import scala.concurrent.duration.{DurationInt, FiniteDuration}
import scala.util.Random

Expand Down Expand Up @@ -69,22 +72,21 @@ object BlockchainWatchdog {
def apply(nodeParams: NodeParams, maxRandomDelay: FiniteDuration, blockTimeout: FiniteDuration = 15 minutes): Behavior[Command] = {
Behaviors.setup { context =>
val socksProxy_opt = nodeParams.socksProxy_opt.flatMap(params => if (params.useForWatchdogs) Some(params) else None)
implicit val sttpBackend = ExplorerApi.createSttpBackend(socksProxy_opt)
val chainHash = nodeParams.chainHash
val sources = nodeParams.blockchainWatchdogSources
implicit val sttpBackend: SttpBackend[Future, Nothing] = ExplorerApi.createSttpBackend(socksProxy_opt)

val explorers = Seq(
ExplorerApi.BlockstreamExplorer(socksProxy_opt),
ExplorerApi.BlockcypherExplorer(socksProxy_opt),
ExplorerApi.MempoolSpaceExplorer(socksProxy_opt)).filter { e =>
val enabled = sources.contains(e.name)
ExplorerApi.MempoolSpaceExplorer(socksProxy_opt)
).filter { e =>
val enabled = nodeParams.blockchainWatchdogSources.contains(e.name)
if (!enabled) {
context.log.warn(s"blockchain watchdog ${e.name} is disabled")
}
enabled
}

val headersOverDnsEnabled = socksProxy_opt.isEmpty && sources.contains(HeadersOverDns.Source)
val headersOverDnsEnabled = socksProxy_opt.isEmpty && nodeParams.blockchainWatchdogSources.contains(HeadersOverDns.Source)
if (!headersOverDnsEnabled) {
context.log.warn(s"blockchain watchdog ${HeadersOverDns.Source} is disabled")
}
Expand All @@ -107,10 +109,10 @@ object BlockchainWatchdog {
case CheckLatestHeaders(blockCount) =>
val id = UUID.randomUUID()
if (headersOverDnsEnabled) {
context.spawn(HeadersOverDns(chainHash, blockCount), s"${HeadersOverDns.Source}-$blockCount-$id") ! HeadersOverDns.CheckLatestHeaders(context.self)
context.spawn(HeadersOverDns(nodeParams.chainHash, blockCount), s"${HeadersOverDns.Source}-$blockCount-$id") ! HeadersOverDns.CheckLatestHeaders(context.self)
}
explorers.foreach { explorer =>
context.spawn(ExplorerApi(chainHash, blockCount, explorer), s"${explorer.name}-$blockCount-$id") ! ExplorerApi.CheckLatestHeaders(context.self)
context.spawn(ExplorerApi(nodeParams.chainHash, blockCount, explorer), s"${explorer.name}-$blockCount-$id") ! ExplorerApi.CheckLatestHeaders(context.self)
}
Behaviors.same
case headers@LatestHeaders(blockCount, blockHeaders, source) =>
Expand All @@ -121,6 +123,7 @@ object BlockchainWatchdog {
if (missingBlocks >= 6) {
context.log.warn("{}: we are {} blocks late: we may be eclipsed from the bitcoin network", source, missingBlocks)
context.system.eventStream ! EventStream.Publish(DangerousBlocksSkew(headers))
context.system.eventStream ! EventStream.Publish(NotifyNodeOperator(NotificationsLogger.Warning, s"we are $missingBlocks late according to $source: we may be eclipsed from the bitcoin network, check your bitcoind node."))
} else if (missingBlocks > 0) {
context.log.info("{}: we are {} blocks late", source, missingBlocks)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import akka.pattern.pipe
import fr.acinq.bitcoin.Crypto.{PrivateKey, PublicKey}
import fr.acinq.bitcoin.{ByteVector32, OutPoint, Satoshi, SatoshiLong, Script, ScriptFlags, Transaction}
import fr.acinq.eclair.Logs.LogCategory
import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator
import fr.acinq.eclair._
import fr.acinq.eclair.blockchain.OnChainWallet.MakeFundingTxResponse
import fr.acinq.eclair.blockchain._
Expand Down Expand Up @@ -2498,6 +2499,7 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
case None =>
// the published tx was neither their current commitment nor a revoked one
log.error(s"couldn't identify txid=${tx.txid}, something very bad is going on!!!")
context.system.eventStream.publish(NotifyNodeOperator(NotificationsLogger.Error, s"funding tx ${d.commitments.commitInput.outPoint.txid} of channel ${d.channelId} was spent by an unknown transaction, indicating that your DB has lost data or your node has been breached: please contact the dev team."))
goto(ERR_INFORMATION_LEAK)
}
}
Expand All @@ -2521,7 +2523,8 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo

private def handleInformationLeak(tx: Transaction, d: HasCommitments) = {
// this is never supposed to happen !!
log.error(s"our funding tx ${d.commitments.commitInput.outPoint.txid} was spent by txid=${tx.txid} !!")
log.error(s"our funding tx ${d.commitments.commitInput.outPoint.txid} was spent by txid=${tx.txid}!!")
context.system.eventStream.publish(NotifyNodeOperator(NotificationsLogger.Error, s"funding tx ${d.commitments.commitInput.outPoint.txid} of channel ${d.channelId} was spent by an unknown transaction, indicating that your DB has lost data or your node has been breached: please contact the dev team."))
val exc = FundingTxSpent(d.channelId, tx)
val error = Error(d.channelId, exc.getMessage)

Expand Down Expand Up @@ -2617,6 +2620,7 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
} catch {
case t: SQLException =>
log.error(t, "fatal database error\n")
NotificationsLogger.logFatalError("eclair is shutting down because of a fatal database error", t)
sys.exit(-2)
case t: Throwable => handleLocalError(t, event.stateData, None)
}
Expand Down
2 changes: 2 additions & 0 deletions eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import fr.acinq.bitcoin.Crypto.PublicKey
import fr.acinq.bitcoin.{ByteVector32, Satoshi, SatoshiLong, Script}
import fr.acinq.eclair.Features.Wumbo
import fr.acinq.eclair.Logs.LogCategory
import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator
import fr.acinq.eclair._
import fr.acinq.eclair.blockchain.bitcoind.ZmqWatcher
import fr.acinq.eclair.blockchain.fee.FeeratePerKw
Expand Down Expand Up @@ -114,6 +115,7 @@ class Peer(val nodeParams: NodeParams, remoteNodeId: PublicKey, wallet: OnChainA

case Event(err@Error(channelId, reason, _), d: ConnectedData) if channelId == CHANNELID_ZERO =>
log.error(s"connection-level error, failing all channels! reason=${new String(reason.toArray)}")
context.system.eventStream.publish(NotifyNodeOperator(NotificationsLogger.Info, s"$remoteNodeId sent us a connection-level error, closing all channels (reason=${new String(reason.toArray)})"))
d.channels.values.toSet[ActorRef].foreach(_ forward err) // we deduplicate with toSet because there might be two entries per channel (tmp id and final id)
d.peerConnection ! PeerConnection.Kill(KillReason.AllChannelsFail)
stay()
Expand Down
19 changes: 15 additions & 4 deletions eclair-core/src/test/resources/logback-test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,40 @@
</encoder>
</appender>

<appender name="CONSOLE_NOTIFICATIONS" class="ch.qos.logback.core.ConsoleAppender">
<target>System.out</target>
<encoder>
<pattern>%d %highlight(%-5level)- %msg%ex{12}%n</pattern>
</encoder>
</appender>

<!--
Logging from tests are silenced by this appender. When there is a test failure
the captured logging events are flushed to the appenders defined for the
akka.actor.testkit.typed.internal.CapturingAppenderDelegate logger.
-->
<appender name="CapturingAppender" class="akka.actor.testkit.typed.internal.CapturingAppender" />
<appender name="CapturingAppender" class="akka.actor.testkit.typed.internal.CapturingAppender"/>

<!--
The appenders defined for this CapturingAppenderDelegate logger are used
when there is a test failure and all logging events from the test are
flushed to these appenders.
-->
<logger name="akka.actor.testkit.typed.internal.CapturingAppenderDelegate" >
<logger name="akka.actor.testkit.typed.internal.CapturingAppenderDelegate">
<appender-ref ref="CONSOLE"/>
</logger>

<!--logger name="fr.acinq.eclair.channel" level="DEBUG"/-->
<logger level="INFO" name="notifications">
<!--<appender-ref ref="CONSOLE_NOTIFICATIONS"/>-->
</logger>

<logger name="fr.acinq.eclair.router" level="WARN"/>
<logger name="fr.acinq.eclair.channel" level="WARN"/>
<logger name="fr.acinq.eclair.Diagnostics" level="OFF"/>
<logger name="fr.acinq.eclair.blockchain.bitcoind.ZmqWatcher" level="OFF"/>
<logger name="fr.acinq.eclair.db.FileBackupHandler" level="OFF"/>
<logger name="fr.acinq.eclair.payment.relay" level="DEBUG" />
<!--<logger name="fr.acinq.eclair.payment.relay" level="DEBUG" />-->
<!--<logger name="fr.acinq.eclair.channel" level="DEBUG" />-->

<root level="INFO">
<appender-ref ref="CapturingAppender"/>
Expand Down
19 changes: 19 additions & 0 deletions eclair-front/src/main/resources/logback.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@
</encoder>
</appender>

<appender name="ROLLING_NOTIFICATIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${eclair.datadir:-${user.home}/.eclair}/notifications.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${eclair.datadir:-${user.home}/.eclair}/notifications.%d{yyyy-MM-dd}.log</fileNamePattern>
<!-- keep 30 days' worth of history capped at 1 GB total size -->
<maxHistory>30</maxHistory>
<totalSizeCap>1GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%d %-5level- %msg%ex{24}%n</pattern>
</encoder>
</appender>

<if condition='isDefined("eclair.printToConsole")'>
<then>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
Expand All @@ -52,6 +66,11 @@
</then>
</if>

<!-- This logger contains important notifications for node operators. -->
<logger level="INFO" name="notifications" additivity="false">
<appender-ref ref="ROLLING_NOTIFICATIONS"/>
</logger>

<root level="INFO">
<appender-ref ref="ROLLING"/>
</root>
Expand Down
19 changes: 19 additions & 0 deletions eclair-node/src/main/resources/logback.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@
</encoder>
</appender>

<appender name="ROLLING_NOTIFICATIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${eclair.datadir:-${user.home}/.eclair}/notifications.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${eclair.datadir:-${user.home}/.eclair}/notifications.%d{yyyy-MM-dd}.log</fileNamePattern>
<!-- keep 30 days' worth of history capped at 1 GB total size -->
<maxHistory>30</maxHistory>
<totalSizeCap>1GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%d %-5level- %msg%ex{24}%n</pattern>
</encoder>
</appender>

<if condition='isDefined("eclair.printToConsole")'>
<then>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
Expand All @@ -52,6 +66,11 @@
</then>
</if>

<!-- This logger contains important notifications for node operators. -->
<logger level="INFO" name="notifications" additivity="false">
<appender-ref ref="ROLLING_NOTIFICATIONS"/>
</logger>

<root level="INFO">
<appender-ref ref="ROLLING"/>
</root>
Expand Down
Loading

0 comments on commit 765a0c5

Please sign in to comment.