Skip to content

Commit

Permalink
Do not JOIN when imported date range has no overlap
Browse files Browse the repository at this point in the history
  • Loading branch information
ukutaht committed Mar 8, 2022
1 parent a7bc60f commit 8ddbfb4
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 95 deletions.
6 changes: 2 additions & 4 deletions lib/plausible/stats/breakdown.ex
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,8 @@ defmodule Plausible.Stats.Breakdown do

{base_query_raw, base_query_raw_params} = ClickhouseRepo.to_sql(:all, q)

with_imported = query.with_imported && site.imported_data

select =
if with_imported do
if query.include_imported do
"sum(td), count(case when p2 != p then 1 end)"
else
"round(sum(td)/count(case when p2 != p then 1 end))"
Expand Down Expand Up @@ -275,7 +273,7 @@ defmodule Plausible.Stats.Breakdown do

{:ok, res} = ClickhouseRepo.query(time_query, base_query_raw_params ++ [pages])

if with_imported do
if query.include_imported do
# Imported page views have pre-calculated values
res =
res.rows
Expand Down
15 changes: 4 additions & 11 deletions lib/plausible/stats/imported.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,18 @@ defmodule Plausible.Stats.Imported do

@no_ref "Direct / None"

def merge_imported_timeseries(native_q, _, %Plausible.Stats.Query{with_imported: false}, _),
def merge_imported_timeseries(native_q, _, %Plausible.Stats.Query{include_imported: false}, _),
do: native_q

def merge_imported_timeseries(native_q, _, %Plausible.Stats.Query{filters: filters}, _)
when length(filters) > 0,
do: native_q

def merge_imported_timeseries(
native_q,
%Plausible.Site{id: site_id, imported_data: %{status: "ok"}},
site,
query,
metrics
) do
imported_q =
from(v in "imported_visitors",
where: v.site_id == ^site_id,
where: v.site_id == ^site.id,
where: v.date >= ^query.date_range.first and v.date <= ^query.date_range.last,
select: %{visitors: sum(v.visitors)}
)
Expand All @@ -33,8 +29,6 @@ defmodule Plausible.Stats.Imported do
|> select_joined_metrics(metrics)
end

def merge_imported_timeseries(native_q, _site, _query, _metrics), do: native_q

defp apply_interval(imported_q, %Plausible.Stats.Query{interval: "month"}) do
imported_q
|> group_by([i], fragment("toStartOfMonth(?)", i.date))
Expand All @@ -47,8 +41,7 @@ defmodule Plausible.Stats.Imported do
|> select_merge([i], %{date: i.date})
end

def merge_imported(q, %Plausible.Site{imported_data: nil}, _, _, _), do: q
def merge_imported(q, _, %Query{with_imported: false}, _, _), do: q
def merge_imported(q, _, %Query{include_imported: false}, _, _), do: q
def merge_imported(q, _, _, _, [:events | _]), do: q
# GA only has 'source'
def merge_imported(q, _, _, "utm_source", _), do: q
Expand Down
81 changes: 46 additions & 35 deletions lib/plausible/stats/query.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Plausible.Stats.Query do
period: nil,
filters: %{},
sample_threshold: 20_000_000,
with_imported: true
include_imported: false

@default_sample_threshold 20_000_000

Expand Down Expand Up @@ -38,62 +38,62 @@ defmodule Plausible.Stats.Query do
Map.put(query, :date_range, Date.range(new_first, new_last))
end

def from(tz, %{"period" => "realtime"} = params) do
date = today(tz)
def from(site, %{"period" => "realtime"} = params) do
date = today(site.timezone)

%__MODULE__{
period: "realtime",
interval: "minute",
date_range: Date.range(date, date),
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: false
include_imported: false
}
end

def from(tz, %{"period" => "day"} = params) do
date = parse_single_date(tz, params)
def from(site, %{"period" => "day"} = params) do
date = parse_single_date(site.timezone, params)

%__MODULE__{
period: "day",
date_range: Date.range(date, date),
interval: "hour",
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: include_imported(params)
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
end

def from(tz, %{"period" => "7d"} = params) do
end_date = parse_single_date(tz, params)
def from(site, %{"period" => "7d"} = params) do
end_date = parse_single_date(site.timezone, params)
start_date = end_date |> Timex.shift(days: -6)

%__MODULE__{
period: "7d",
date_range: Date.range(start_date, end_date),
interval: "date",
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: include_imported(params)
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
end

def from(tz, %{"period" => "30d"} = params) do
end_date = parse_single_date(tz, params)
def from(site, %{"period" => "30d"} = params) do
end_date = parse_single_date(site.timezone, params)
start_date = end_date |> Timex.shift(days: -30)

%__MODULE__{
period: "30d",
date_range: Date.range(start_date, end_date),
interval: "date",
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: include_imported(params)
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
end

def from(tz, %{"period" => "month"} = params) do
date = parse_single_date(tz, params)
def from(site, %{"period" => "month"} = params) do
date = parse_single_date(site.timezone, params)

start_date = Timex.beginning_of_month(date)
end_date = Timex.end_of_month(date)
Expand All @@ -103,14 +103,14 @@ defmodule Plausible.Stats.Query do
date_range: Date.range(start_date, end_date),
interval: "date",
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: include_imported(params)
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
end

def from(tz, %{"period" => "6mo"} = params) do
def from(site, %{"period" => "6mo"} = params) do
end_date =
parse_single_date(tz, params)
parse_single_date(site.timezone, params)
|> Timex.end_of_month()

start_date =
Expand All @@ -122,14 +122,14 @@ defmodule Plausible.Stats.Query do
date_range: Date.range(start_date, end_date),
interval: Map.get(params, "interval", "month"),
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: include_imported(params)
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
end

def from(tz, %{"period" => "12mo"} = params) do
def from(site, %{"period" => "12mo"} = params) do
end_date =
parse_single_date(tz, params)
parse_single_date(site.timezone, params)
|> Timex.end_of_month()

start_date =
Expand All @@ -141,22 +141,22 @@ defmodule Plausible.Stats.Query do
date_range: Date.range(start_date, end_date),
interval: Map.get(params, "interval", "month"),
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: include_imported(params)
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
end

def from(tz, %{"period" => "custom", "from" => from, "to" => to} = params) do
def from(site, %{"period" => "custom", "from" => from, "to" => to} = params) do
new_params =
params
|> Map.delete("from")
|> Map.delete("to")
|> Map.put("date", Enum.join([from, to], ","))

from(tz, new_params)
from(site, new_params)
end

def from(_tz, %{"period" => "custom", "date" => date} = params) do
def from(site, %{"period" => "custom", "date" => date} = params) do
[from, to] = String.split(date, ",")
from_date = Date.from_iso8601!(String.trim(from))
to_date = Date.from_iso8601!(String.trim(to))
Expand All @@ -166,9 +166,9 @@ defmodule Plausible.Stats.Query do
date_range: Date.range(from_date, to_date),
interval: Map.get(params, "interval", "date"),
filters: parse_filters(params),
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold),
with_imported: include_imported(params)
sample_threshold: Map.get(params, "sample_threshold", @default_sample_threshold)
}
|> maybe_include_imported(site, params)
end

def from(tz, params) do
Expand Down Expand Up @@ -257,7 +257,18 @@ defmodule Plausible.Stats.Query do
defp parse_goal_filter("Visit " <> page), do: {:is, :page, page}
defp parse_goal_filter(event), do: {:is, :event, event}

defp include_imported(params) do
params["filters"] in [nil, "{}"] && params["with_imported"] == "true"
defp maybe_include_imported(query, site, params) do
imported_data_requested = params["with_imported"] == "true"
has_imported_data = site.imported_data && site.imported_data.status == "ok"

date_range_overlaps =
has_imported_data && !Timex.after?(query.date_range.first, site.imported_data.end_date)

no_filters_applied = Enum.empty?(query.filters)

include_imported =
imported_data_requested && has_imported_data && date_range_overlaps && no_filters_applied

%{query | include_imported: !!include_imported}
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController do

def realtime_visitors(conn, _params) do
site = conn.assigns[:site]
query = Query.from(site.timezone, %{"period" => "realtime"})
query = Query.from(site, %{"period" => "realtime"})
json(conn, Plausible.Stats.Clickhouse.current_visitors(site, query))
end

Expand All @@ -16,7 +16,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController do

with :ok <- validate_period(params),
:ok <- validate_date(params),
query <- Query.from(site.timezone, params),
query <- Query.from(site, params),
{:ok, metrics} <- parse_metrics(params, nil, query) do
results =
if params["compare"] == "previous_period" do
Expand Down Expand Up @@ -61,7 +61,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
with :ok <- validate_period(params),
:ok <- validate_date(params),
{:ok, property} <- validate_property(params),
query <- Query.from(site.timezone, params),
query <- Query.from(site, params),
{:ok, metrics} <- parse_metrics(params, property, query) do
limit = String.to_integer(Map.get(params, "limit", "100"))
page = String.to_integer(Map.get(params, "page", "1"))
Expand Down Expand Up @@ -144,7 +144,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController do
with :ok <- validate_period(params),
:ok <- validate_date(params),
:ok <- validate_interval(params),
query <- Query.from(site.timezone, params),
query <- Query.from(site, params),
{:ok, metrics} <- parse_metrics(params, nil, query) do
graph = Plausible.Stats.timeseries(site, query, metrics)
metrics = metrics ++ [:date]
Expand Down
Loading

0 comments on commit 8ddbfb4

Please sign in to comment.