Skip to content

Commit

Permalink
fix(time_offset): improved LIMIT-handling in advanced analytics (#27934)
Browse files Browse the repository at this point in the history
(cherry picked from commit 6844735)
  • Loading branch information
Antonio-RiveroMartnez authored and michael-s-molina committed Apr 16, 2024
1 parent 55c8d6c commit fa21f44
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 1 deletion.
7 changes: 7 additions & 0 deletions superset/common/query_context_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,13 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme
for metric in metric_names
}

# When the original query has limit or offset we wont apply those
# to the subquery so we prevent data inconsistency due to missing records
# in the dataframes when performing the join
if query_object.row_limit or query_object.row_offset:
query_object_clone_dct["row_limit"] = config["ROW_LIMIT"]
query_object_clone_dct["row_offset"] = 0

if isinstance(self._qc_datasource, Query):
result = self._qc_datasource.exc_query(query_object_clone_dct)
else:
Expand Down
67 changes: 66 additions & 1 deletion tests/integration_tests/query_context_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
import re
import time
from typing import Any
from unittest.mock import Mock, patch

import numpy as np
import pandas as pd
import pytest
from pandas import DateOffset

from superset import db
from superset import app, db
from superset.charts.schemas import ChartDataQueryContextSchema
from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType
from superset.common.query_context import QueryContext
Expand Down Expand Up @@ -689,6 +690,70 @@ def test_time_offsets_accuracy(self):
== df_3_years_later.loc[index]["sum__num"]
)

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
@patch("superset.common.query_context.QueryContext.get_query_result")
def test_time_offsets_in_query_object_no_limit(self, query_result_mock):
"""
Ensure that time_offsets can generate the correct queries and
it doesnt use the row_limit nor row_offset from the original
query object
"""
payload = get_query_context("birth_names")
payload["queries"][0]["columns"] = [
{
"timeGrain": "P1D",
"columnType": "BASE_AXIS",
"sqlExpression": "ds",
"label": "ds",
"expressionType": "SQL",
}
]
payload["queries"][0]["metrics"] = ["sum__num"]
payload["queries"][0]["groupby"] = ["name"]
payload["queries"][0]["is_timeseries"] = True
payload["queries"][0]["row_limit"] = 100
payload["queries"][0]["row_offset"] = 10
payload["queries"][0]["time_range"] = "1990 : 1991"

initial_data = {
"__timestamp": ["1990-01-01", "1990-01-01"],
"name": ["zban", "ahwb"],
"sum__num": [43571, 27225],
}
initial_df = pd.DataFrame(initial_data)

mock_query_result = Mock()
mock_query_result.df = initial_df
side_effects = [mock_query_result]
query_result_mock.side_effect = side_effects
# Proceed with the test as before
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
# First call to get_query_result, should return initial_df
query_result = query_context.get_query_result(query_object)
df = query_result.df
# Setup the payload for time offsets
payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
time_offsets_obj = query_context.processing_time_offsets(df, query_object)
sqls = time_offsets_obj["queries"]
row_limit_value = app.config["ROW_LIMIT"]
row_limit_pattern_with_config_value = r"LIMIT " + re.escape(
str(row_limit_value)
)
self.assertEqual(len(sqls), 2)
# 1 year ago
assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S)
assert not re.search(r"LIMIT 100", sqls[0], re.S)
assert not re.search(r"OFFSET 10", sqls[0], re.S)
assert re.search(row_limit_pattern_with_config_value, sqls[0], re.S)
# 1 year later
assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S)
assert not re.search(r"LIMIT 100", sqls[1], re.S)
assert not re.search(r"OFFSET 10", sqls[1], re.S)
assert re.search(row_limit_pattern_with_config_value, sqls[1], re.S)


def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):
qc = QueryContextFactory().create(
Expand Down

0 comments on commit fa21f44

Please sign in to comment.