Skip to content

Commit

Permalink
fix: alerts duplication bug, LOGFLARE_ALERTS_MIN_CLUSTER_SIZE (#1994)
Browse files Browse the repository at this point in the history
* fix: alerts duplication bug, LOGFLARE_ALERTS_MIN_CLUSTER_SIZE

* chore: tweak LOGFLARE_ALERTS_MIN_CLUSTER_SIZE to 4 for prod

* chore: formatting
  • Loading branch information
Ziinc authored Mar 5, 2024
1 parent 135b691 commit 45b32be
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 2 deletions.
2 changes: 1 addition & 1 deletion cloudbuild/prod/pre-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ steps:
- --container-image=${_CONTAINER_IMAGE}
- --container-privileged
- --container-restart-policy=always
- --container-env=LOGFLARE_GRPC_PORT=4001,LOGFLARE_MIN_CLUSTER_SIZE=2,RELEASE_COOKIE=${_COOKIE},LOGFLARE_PUBSUB_POOL_SIZE=56,LOGFLARE_LOGGER_METADATA_CLUSTER=${_CLUSTER}
- --container-env=LOGFLARE_GRPC_PORT=4001,LOGFLARE_MIN_CLUSTER_SIZE=2,RELEASE_COOKIE=${_COOKIE},LOGFLARE_PUBSUB_POOL_SIZE=56,LOGFLARE_LOGGER_METADATA_CLUSTER=${_CLUSTER},LOGFLARE_ALERTS_MIN_CLUSTER_SIZE=4
- --no-shielded-secure-boot
- --shielded-vtpm
- --shielded-integrity-monitoring
Expand Down
2 changes: 2 additions & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ config :logflare,
# normal instances can be more than 90 seconds
sigterm_shutdown_grace_period_ms: 15_000

config :logflare, Logflare.Alerting, min_cluster_size: 1, enabled: true

config :logflare, Logflare.Google, dataset_id_append: "_default"

# Configures the endpoint
Expand Down
12 changes: 12 additions & 0 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@ config :logflare,
]
|> filter_nil_kv_pairs.()

config :logflare,
Logflare.Alerting,
[
min_cluster_size:
if(System.get_env("LOGFLARE_ALERTS_MIN_CLUSTER_SIZE") != nil,
do: String.to_integer(System.get_env("LOGFLARE_ALERTS_MIN_CLUSTER_SIZE")),
else: nil
),
enabled: System.get_env("LOGFLARE_ALERTS_ENABLED", "true") == "true"
]
|> filter_nil_kv_pairs.()

config :logflare,
LogflareWeb.Endpoint,
filter_nil_kv_pairs.(
Expand Down
2 changes: 2 additions & 0 deletions docs/docs.logflare.com/docs/self-hosting/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ All browser authentication will be disabled when in single-tenant mode.
| `LOGFLARE_NODE_HOST` | string, defaults to `127.0.0.1` | Sets node host on startup, which affects the node name `logflare@<host>` |
| `LOGFLARE_LOGGER_METADATA_CLUSTER` | string, defaults to `nil` | Sets global logging metadata for the cluster name. Useful for filtering logs by cluster name. |
| `LOGFLARE_PUBSUB_POOL_SIZE` | Integer, defaults to `10` | Sets the number of `Phoenix.PubSub.PG2` partitions to be created. Should be configured to the number of cores of your server for optimal multi-node performance. |
| `LOGFLARE_ALERTS_ENABLED` | Boolean, defaults to `true` | Flag for enabling and disabling query alerts. |
| `LOGFLARE_ALERTS_MIN_CLUSTER_SIZE` | Integer, defaults to `1` | Sets the required cluster size for Query Alerts to be run. If cluster size is below the provided value, query alerts will not run. |

### BigQuery Backend Configuration

Expand Down
20 changes: 19 additions & 1 deletion lib/logflare/alerting.ex
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ defmodule Logflare.Alerting do
alias Logflare.Alerting.AlertQuery
alias Logflare.User
alias Logflare.Endpoints
alias Logflare.Cluster

@doc """
Returns the list of alert_queries.
Expand Down Expand Up @@ -144,7 +145,7 @@ defmodule Logflare.Alerting do
id: alert_query.id,
schedule: alert_query.cron,
extended_syntax: false,
task: {__MODULE__, :run_alert, [alert_query]}
task: {__MODULE__, :run_alert, [alert_query, :scheduled]}
})

{:ok, get_alert_job(alert_query)}
Expand Down Expand Up @@ -172,7 +173,24 @@ defmodule Logflare.Alerting do
Send notifications if necessary configurations are set. If no results are returned from the query execution, no alert is sent.
"""
@spec run_alert(AlertQuery.t(), :scheduled) :: :ok
@spec run_alert(AlertQuery.t()) :: :ok
def run_alert(%AlertQuery{} = alert_query, :scheduled) do
# perform pre-run checks
cfg = Application.get_env(:logflare, Logflare.Alerting)

cond do
cfg[:enabled] == false ->
{:error, :not_enabled}

cfg[:min_cluster_size] > Cluster.Utils.actual_cluster_size() ->
{:error, :below_min_cluster_size}

true ->
run_alert(alert_query)
end
end

def run_alert(%AlertQuery{} = alert_query) do
alert_query = alert_query |> Repo.preload([:user])

Expand Down
22 changes: 22 additions & 0 deletions test/logflare/alerting_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,27 @@ defmodule Logflare.AlertingTest do

assert {:error, :no_results} = Alerting.run_alert(alert_query)
end


test "run_alert/2, performs pre-run configuration checks", %{user: user} do
alert_query = insert(:alert, user: user)

reject(&GoogleApi.BigQuery.V2.Api.Jobs.bigquery_jobs_query/3)
reject(&Logflare.Backends.Adaptor.WebhookAdaptor.Client.send/2)
reject(&Logflare.Backends.Adaptor.SlackAdaptor.Client.send/2)
Application.get_env(:logflare, Logflare.Alerting)
cfg = Application.get_env(:logflare, Logflare.Alerting)
on_exit(fn ->
Application.put_env(:logflare, Logflare.Alerting, cfg)
end)

# min_cluster_size
Application.put_env(:logflare, Logflare.Alerting, min_cluster_size: 4, enabled: true)
assert {:error, :below_min_cluster_size} = Alerting.run_alert(alert_query, :scheduled)
# enabled flag
Application.put_env(:logflare, Logflare.Alerting, min_cluster_size: 1, enabled: false)
assert {:error, :not_enabled} = Alerting.run_alert(alert_query, :scheduled)
end
end

describe "citrine integration" do
Expand All @@ -226,4 +247,5 @@ defmodule Logflare.AlertingTest do
assert %Citrine.Job{} = Alerting.get_alert_job(alert.id)
end
end

end

0 comments on commit 45b32be

Please sign in to comment.