From 765a0c5436a8e2ec1f469f1b066910f1205c4c99 Mon Sep 17 00:00:00 2001 From: Bastien Teinturier <31281497+t-bast@users.noreply.github.com> Date: Mon, 25 Oct 2021 11:21:39 +0200 Subject: [PATCH] Add log file for important notifications (#1982) Add a new log file for important notifications that require an action from the node operator. Using a separate log file makes it easier than grepping specific messages from the standard logs, and lets us use a different style of messaging, where we provide more information about what steps to take to resolve the issue. We rely on an event sent to the event stream so that plugins can also pick it up and connect with notification systems (push, messages, mails, etc). --- docs/release-notes/eclair-vnext.md | 8 ++- .../src/main/scala/fr/acinq/eclair/Logs.scala | 49 +++++++++++++++++-- .../main/scala/fr/acinq/eclair/Setup.scala | 3 ++ .../acinq/eclair/balance/BalanceActor.scala | 8 ++- .../watchdogs/BlockchainWatchdog.scala | 21 ++++---- .../fr/acinq/eclair/channel/Channel.scala | 6 ++- .../main/scala/fr/acinq/eclair/io/Peer.scala | 2 + .../src/test/resources/logback-test.xml | 19 +++++-- eclair-front/src/main/resources/logback.xml | 19 +++++++ eclair-node/src/main/resources/logback.xml | 19 +++++++ .../src/main/scala/fr/acinq/eclair/Boot.scala | 10 ++-- .../src/test/resources/logback-test.xml | 13 ++++- 12 files changed, 149 insertions(+), 28 deletions(-) diff --git a/docs/release-notes/eclair-vnext.md b/docs/release-notes/eclair-vnext.md index b3578349d9..d3fa7ac134 100644 --- a/docs/release-notes/eclair-vnext.md +++ b/docs/release-notes/eclair-vnext.md @@ -4,7 +4,13 @@ ## Major changes - +### Separate log for important notifications + +Eclair added a new log file (`notifications.log`) for important notifications that require an action from the node operator. +Node operators should watch this file very regularly. + +An event is also sent to the event stream for every such notification. +This lets plugins notify the node operator via external systems (push notifications, email, etc). ### API changes diff --git a/eclair-core/src/main/scala/fr/acinq/eclair/Logs.scala b/eclair-core/src/main/scala/fr/acinq/eclair/Logs.scala index 5f83f372c4..32d4882fd2 100644 --- a/eclair-core/src/main/scala/fr/acinq/eclair/Logs.scala +++ b/eclair-core/src/main/scala/fr/acinq/eclair/Logs.scala @@ -16,6 +16,9 @@ package fr.acinq.eclair +import akka.actor.typed.Behavior +import akka.actor.typed.eventstream.EventStream +import akka.actor.typed.scaladsl.Behaviors import akka.event.DiagnosticLoggingAdapter import akka.io.Tcp import fr.acinq.bitcoin.ByteVector32 @@ -28,6 +31,7 @@ import fr.acinq.eclair.io.{Peer, PeerConnection} import fr.acinq.eclair.router.Router._ import fr.acinq.eclair.router._ import fr.acinq.eclair.wire.protocol._ +import grizzled.slf4j.Logger import java.util.UUID @@ -35,9 +39,9 @@ object Logs { /** * @param parentPaymentId_opt depending on the context, this may be: - * - for a send : the parent payment id - * - for a channel-relay : the relay id - * - for a trampoline-relay : the relay id and the parent payment id of the outgoing payment + * - for a send: the parent payment id + * - for a channel-relay: the relay id + * - for a trampoline-relay: the relay id and the parent payment id of the outgoing payment */ def mdc(category_opt: Option[LogCategory] = None, remoteNodeId_opt: Option[PublicKey] = None, channelId_opt: Option[ByteVector32] = None, parentPaymentId_opt: Option[UUID] = None, paymentId_opt: Option[UUID] = None, paymentHash_opt: Option[ByteVector32] = None): Map[String, String] = Seq( @@ -77,7 +81,6 @@ object Logs { } } - // @formatter: off sealed trait LogCategory { def category: String } @@ -138,8 +141,44 @@ object Logs { } } - // @formatter: on } // we use a dedicated class so that the logging can be independently adjusted case class Diagnostics() + +object NotificationsLogger { + + // @formatter:off + sealed trait Severity + case object Info extends Severity + case object Warning extends Severity + case object Error extends Severity + // @formatter:on + + /** This event should be used to send important notifications for the node operator. */ + case class NotifyNodeOperator(severity: Severity, message: String) + + /** This logger should be used to log important notifications for the node operator. */ + private val log = Logger("notifications") + + /** + * Use this function instead of the [[NotifyNodeOperator]] event when a fatal error leads to stopping eclair immediately. + * Otherwise the actor wouldn't have time to handle the notification and log it before the actor system is shutdown. + */ + def logFatalError(message: String, t: Throwable): Unit = log.error(message, t) + + def apply(): Behavior[NotifyNodeOperator] = + Behaviors.setup { context => + context.system.eventStream ! EventStream.Subscribe(context.self) + Behaviors.receiveMessage { + case NotifyNodeOperator(severity, message) => + severity match { + case Info => log.info(message) + case Warning => log.warn(message) + case Error => log.error(message) + } + Behaviors.same + } + } + +} \ No newline at end of file diff --git a/eclair-core/src/main/scala/fr/acinq/eclair/Setup.scala b/eclair-core/src/main/scala/fr/acinq/eclair/Setup.scala index 62ac023704..066c157831 100644 --- a/eclair-core/src/main/scala/fr/acinq/eclair/Setup.scala +++ b/eclair-core/src/main/scala/fr/acinq/eclair/Setup.scala @@ -87,6 +87,9 @@ class Setup(val datadir: File, randomGen.init() system.spawn(Behaviors.supervise(WeakEntropyPool(randomGen)).onFailure(typed.SupervisorStrategy.restart), "entropy-pool") + // start a system-wide actor to collect and log important notifications for the node operator + system.spawn(Behaviors.supervise(NotificationsLogger()).onFailure(typed.SupervisorStrategy.restart), "notifications-logger") + datadir.mkdirs() val config = system.settings.config.getConfig("eclair") val Seeds(nodeSeed, channelSeed) = seeds_opt.getOrElse(NodeParams.getSeeds(datadir)) diff --git a/eclair-core/src/main/scala/fr/acinq/eclair/balance/BalanceActor.scala b/eclair-core/src/main/scala/fr/acinq/eclair/balance/BalanceActor.scala index 923d04a6ef..24aa1d06dc 100644 --- a/eclair-core/src/main/scala/fr/acinq/eclair/balance/BalanceActor.scala +++ b/eclair-core/src/main/scala/fr/acinq/eclair/balance/BalanceActor.scala @@ -1,8 +1,11 @@ package fr.acinq.eclair.balance +import akka.actor.typed.eventstream.EventStream import akka.actor.typed.scaladsl.{ActorContext, Behaviors} import akka.actor.typed.{ActorRef, Behavior} -import fr.acinq.bitcoin.ByteVector32 +import fr.acinq.bitcoin.{ByteVector32, MilliBtcDouble} +import fr.acinq.eclair.NotificationsLogger +import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator import fr.acinq.eclair.balance.BalanceActor._ import fr.acinq.eclair.balance.CheckBalance.GlobalBalance import fr.acinq.eclair.balance.Monitoring.{Metrics, Tags} @@ -83,6 +86,9 @@ private class BalanceActor(context: ActorContext[Command], case Success(result) => log.info("current balance: total={} onchain.confirmed={} onchain.unconfirmed={} offchain={}", result.total.toDouble, result.onChain.confirmed.toDouble, result.onChain.unconfirmed.toDouble, result.offChain.total.toDouble) log.debug("current balance details : {}", result) + if (result.onChain.confirmed < 1.millibtc) { + context.system.eventStream ! EventStream.Publish(NotifyNodeOperator(NotificationsLogger.Warning, s"on-chain confirmed balance is low (${result.onChain.confirmed.toMilliBtc}), eclair may not be able to guarantee funds safety in case channels force-close: you should add some utxos to your bitcoin wallet")) + } Metrics.GlobalBalance.withoutTags().update(result.total.toMilliBtc.toDouble) Metrics.GlobalBalanceDetailed.withTag(Tags.BalanceType, Tags.BalanceTypes.OnchainConfirmed).update(result.onChain.confirmed.toMilliBtc.toDouble) Metrics.GlobalBalanceDetailed.withTag(Tags.BalanceType, Tags.BalanceTypes.OnchainUnconfirmed).update(result.onChain.unconfirmed.toMilliBtc.toDouble) diff --git a/eclair-core/src/main/scala/fr/acinq/eclair/blockchain/watchdogs/BlockchainWatchdog.scala b/eclair-core/src/main/scala/fr/acinq/eclair/blockchain/watchdogs/BlockchainWatchdog.scala index 501504ff1a..fc31faba94 100644 --- a/eclair-core/src/main/scala/fr/acinq/eclair/blockchain/watchdogs/BlockchainWatchdog.scala +++ b/eclair-core/src/main/scala/fr/acinq/eclair/blockchain/watchdogs/BlockchainWatchdog.scala @@ -19,13 +19,16 @@ package fr.acinq.eclair.blockchain.watchdogs import akka.actor.typed.Behavior import akka.actor.typed.eventstream.EventStream import akka.actor.typed.scaladsl.Behaviors +import com.softwaremill.sttp.SttpBackend import fr.acinq.bitcoin.BlockHeader -import fr.acinq.eclair.NodeParams +import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator import fr.acinq.eclair.blockchain.CurrentBlockCount import fr.acinq.eclair.blockchain.watchdogs.Monitoring.{Metrics, Tags} import fr.acinq.eclair.tor.Socks5ProxyParams +import fr.acinq.eclair.{NodeParams, NotificationsLogger} import java.util.UUID +import scala.concurrent.Future import scala.concurrent.duration.{DurationInt, FiniteDuration} import scala.util.Random @@ -69,22 +72,21 @@ object BlockchainWatchdog { def apply(nodeParams: NodeParams, maxRandomDelay: FiniteDuration, blockTimeout: FiniteDuration = 15 minutes): Behavior[Command] = { Behaviors.setup { context => val socksProxy_opt = nodeParams.socksProxy_opt.flatMap(params => if (params.useForWatchdogs) Some(params) else None) - implicit val sttpBackend = ExplorerApi.createSttpBackend(socksProxy_opt) - val chainHash = nodeParams.chainHash - val sources = nodeParams.blockchainWatchdogSources + implicit val sttpBackend: SttpBackend[Future, Nothing] = ExplorerApi.createSttpBackend(socksProxy_opt) val explorers = Seq( ExplorerApi.BlockstreamExplorer(socksProxy_opt), ExplorerApi.BlockcypherExplorer(socksProxy_opt), - ExplorerApi.MempoolSpaceExplorer(socksProxy_opt)).filter { e => - val enabled = sources.contains(e.name) + ExplorerApi.MempoolSpaceExplorer(socksProxy_opt) + ).filter { e => + val enabled = nodeParams.blockchainWatchdogSources.contains(e.name) if (!enabled) { context.log.warn(s"blockchain watchdog ${e.name} is disabled") } enabled } - val headersOverDnsEnabled = socksProxy_opt.isEmpty && sources.contains(HeadersOverDns.Source) + val headersOverDnsEnabled = socksProxy_opt.isEmpty && nodeParams.blockchainWatchdogSources.contains(HeadersOverDns.Source) if (!headersOverDnsEnabled) { context.log.warn(s"blockchain watchdog ${HeadersOverDns.Source} is disabled") } @@ -107,10 +109,10 @@ object BlockchainWatchdog { case CheckLatestHeaders(blockCount) => val id = UUID.randomUUID() if (headersOverDnsEnabled) { - context.spawn(HeadersOverDns(chainHash, blockCount), s"${HeadersOverDns.Source}-$blockCount-$id") ! HeadersOverDns.CheckLatestHeaders(context.self) + context.spawn(HeadersOverDns(nodeParams.chainHash, blockCount), s"${HeadersOverDns.Source}-$blockCount-$id") ! HeadersOverDns.CheckLatestHeaders(context.self) } explorers.foreach { explorer => - context.spawn(ExplorerApi(chainHash, blockCount, explorer), s"${explorer.name}-$blockCount-$id") ! ExplorerApi.CheckLatestHeaders(context.self) + context.spawn(ExplorerApi(nodeParams.chainHash, blockCount, explorer), s"${explorer.name}-$blockCount-$id") ! ExplorerApi.CheckLatestHeaders(context.self) } Behaviors.same case headers@LatestHeaders(blockCount, blockHeaders, source) => @@ -121,6 +123,7 @@ object BlockchainWatchdog { if (missingBlocks >= 6) { context.log.warn("{}: we are {} blocks late: we may be eclipsed from the bitcoin network", source, missingBlocks) context.system.eventStream ! EventStream.Publish(DangerousBlocksSkew(headers)) + context.system.eventStream ! EventStream.Publish(NotifyNodeOperator(NotificationsLogger.Warning, s"we are $missingBlocks late according to $source: we may be eclipsed from the bitcoin network, check your bitcoind node.")) } else if (missingBlocks > 0) { context.log.info("{}: we are {} blocks late", source, missingBlocks) } else { diff --git a/eclair-core/src/main/scala/fr/acinq/eclair/channel/Channel.scala b/eclair-core/src/main/scala/fr/acinq/eclair/channel/Channel.scala index 1eff6a1bf2..dc41f1f8ec 100644 --- a/eclair-core/src/main/scala/fr/acinq/eclair/channel/Channel.scala +++ b/eclair-core/src/main/scala/fr/acinq/eclair/channel/Channel.scala @@ -24,6 +24,7 @@ import akka.pattern.pipe import fr.acinq.bitcoin.Crypto.{PrivateKey, PublicKey} import fr.acinq.bitcoin.{ByteVector32, OutPoint, Satoshi, SatoshiLong, Script, ScriptFlags, Transaction} import fr.acinq.eclair.Logs.LogCategory +import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator import fr.acinq.eclair._ import fr.acinq.eclair.blockchain.OnChainWallet.MakeFundingTxResponse import fr.acinq.eclair.blockchain._ @@ -2498,6 +2499,7 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo case None => // the published tx was neither their current commitment nor a revoked one log.error(s"couldn't identify txid=${tx.txid}, something very bad is going on!!!") + context.system.eventStream.publish(NotifyNodeOperator(NotificationsLogger.Error, s"funding tx ${d.commitments.commitInput.outPoint.txid} of channel ${d.channelId} was spent by an unknown transaction, indicating that your DB has lost data or your node has been breached: please contact the dev team.")) goto(ERR_INFORMATION_LEAK) } } @@ -2521,7 +2523,8 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo private def handleInformationLeak(tx: Transaction, d: HasCommitments) = { // this is never supposed to happen !! - log.error(s"our funding tx ${d.commitments.commitInput.outPoint.txid} was spent by txid=${tx.txid} !!") + log.error(s"our funding tx ${d.commitments.commitInput.outPoint.txid} was spent by txid=${tx.txid}!!") + context.system.eventStream.publish(NotifyNodeOperator(NotificationsLogger.Error, s"funding tx ${d.commitments.commitInput.outPoint.txid} of channel ${d.channelId} was spent by an unknown transaction, indicating that your DB has lost data or your node has been breached: please contact the dev team.")) val exc = FundingTxSpent(d.channelId, tx) val error = Error(d.channelId, exc.getMessage) @@ -2617,6 +2620,7 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo } catch { case t: SQLException => log.error(t, "fatal database error\n") + NotificationsLogger.logFatalError("eclair is shutting down because of a fatal database error", t) sys.exit(-2) case t: Throwable => handleLocalError(t, event.stateData, None) } diff --git a/eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala b/eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala index a8e0866d16..65d9e98e35 100644 --- a/eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala +++ b/eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala @@ -25,6 +25,7 @@ import fr.acinq.bitcoin.Crypto.PublicKey import fr.acinq.bitcoin.{ByteVector32, Satoshi, SatoshiLong, Script} import fr.acinq.eclair.Features.Wumbo import fr.acinq.eclair.Logs.LogCategory +import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator import fr.acinq.eclair._ import fr.acinq.eclair.blockchain.bitcoind.ZmqWatcher import fr.acinq.eclair.blockchain.fee.FeeratePerKw @@ -114,6 +115,7 @@ class Peer(val nodeParams: NodeParams, remoteNodeId: PublicKey, wallet: OnChainA case Event(err@Error(channelId, reason, _), d: ConnectedData) if channelId == CHANNELID_ZERO => log.error(s"connection-level error, failing all channels! reason=${new String(reason.toArray)}") + context.system.eventStream.publish(NotifyNodeOperator(NotificationsLogger.Info, s"$remoteNodeId sent us a connection-level error, closing all channels (reason=${new String(reason.toArray)})")) d.channels.values.toSet[ActorRef].foreach(_ forward err) // we deduplicate with toSet because there might be two entries per channel (tmp id and final id) d.peerConnection ! PeerConnection.Kill(KillReason.AllChannelsFail) stay() diff --git a/eclair-core/src/test/resources/logback-test.xml b/eclair-core/src/test/resources/logback-test.xml index 61d69722c5..02005a0eea 100644 --- a/eclair-core/src/test/resources/logback-test.xml +++ b/eclair-core/src/test/resources/logback-test.xml @@ -24,29 +24,40 @@ + + System.out + + %d %highlight(%-5level)- %msg%ex{12}%n + + + - + - + - + + + + - + + diff --git a/eclair-front/src/main/resources/logback.xml b/eclair-front/src/main/resources/logback.xml index 395f1dad75..f7667a5d39 100644 --- a/eclair-front/src/main/resources/logback.xml +++ b/eclair-front/src/main/resources/logback.xml @@ -39,6 +39,20 @@ + + ${eclair.datadir:-${user.home}/.eclair}/notifications.log + + + ${eclair.datadir:-${user.home}/.eclair}/notifications.%d{yyyy-MM-dd}.log + + 30 + 1GB + + + %d %-5level- %msg%ex{24}%n + + + @@ -52,6 +66,11 @@ + + + + + diff --git a/eclair-node/src/main/resources/logback.xml b/eclair-node/src/main/resources/logback.xml index f3f71bdc5c..24b37b6544 100644 --- a/eclair-node/src/main/resources/logback.xml +++ b/eclair-node/src/main/resources/logback.xml @@ -39,6 +39,20 @@ + + ${eclair.datadir:-${user.home}/.eclair}/notifications.log + + + ${eclair.datadir:-${user.home}/.eclair}/notifications.%d{yyyy-MM-dd}.log + + 30 + 1GB + + + %d %-5level- %msg%ex{24}%n + + + @@ -52,6 +66,11 @@ + + + + + diff --git a/eclair-node/src/main/scala/fr/acinq/eclair/Boot.scala b/eclair-node/src/main/scala/fr/acinq/eclair/Boot.scala index 2e63b178cf..d6f54c1f9e 100644 --- a/eclair-node/src/main/scala/fr/acinq/eclair/Boot.scala +++ b/eclair-node/src/main/scala/fr/acinq/eclair/Boot.scala @@ -16,15 +16,15 @@ package fr.acinq.eclair -import java.io.File - import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.stream.BindFailedException +import fr.acinq.eclair.NotificationsLogger.NotifyNodeOperator import fr.acinq.eclair.api.Service import grizzled.slf4j.Logging import kamon.Kamon +import java.io.File import scala.concurrent.ExecutionContext import scala.util.{Failure, Success} @@ -53,6 +53,7 @@ object Boot extends App with Logging { plugins.foreach(_.onKit(kit)) val routeProviderPlugins = plugins.collect { case plugin: RouteProvider => plugin } startApiServiceIfEnabled(kit, routeProviderPlugins) + kit.system.eventStream.publish(NotifyNodeOperator(NotificationsLogger.Info, s"eclair successfully started (version=${Kit.getVersion} commit=${Kit.getCommit})")) case Failure(t) => onError(t) } } catch { @@ -61,10 +62,6 @@ object Boot extends App with Logging { /** * Starts the http APIs service if enabled in the configuration - * - * @param kit - * @param system - * @param ec */ def startApiServiceIfEnabled(kit: Kit, providers: Seq[RouteProvider] = Nil)(implicit system: ActorSystem, ec: ExecutionContext) = { val config = system.settings.config.getConfig("eclair") @@ -91,6 +88,7 @@ object Boot extends App with Logging { val errorMsg = if (t.getMessage != null) t.getMessage else t.getClass.getSimpleName System.err.println(s"fatal error: $errorMsg") logger.error(s"fatal error: $errorMsg", t) + NotificationsLogger.logFatalError("could not start eclair", t) System.exit(1) } } diff --git a/eclair-node/src/test/resources/logback-test.xml b/eclair-node/src/test/resources/logback-test.xml index 25719eb7b7..3a851bd91d 100644 --- a/eclair-node/src/test/resources/logback-test.xml +++ b/eclair-node/src/test/resources/logback-test.xml @@ -24,8 +24,19 @@ + + System.out + + %d %highlight(%-5level)- %msg%ex{12}%n + + + + + + + - + \ No newline at end of file