From e121a8585e753e0e659f52da2bec6b5bc7f79444 Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Mon, 16 Oct 2017 20:16:20 -0700 Subject: [PATCH] [Feature] Percentage columns in Table Viz (#3586) * Added percent metric options to table viz * Added unit tests for TableViz * fixed code for python3 * bump travis --- .../javascripts/explore/stores/controls.jsx | 13 + .../javascripts/explore/stores/visTypes.js | 5 +- superset/assets/visualizations/table.js | 20 +- superset/viz.py | 30 +++ tests/viz_tests.py | 231 +++++++++++++++++- 5 files changed, 293 insertions(+), 6 deletions(-) diff --git a/superset/assets/javascripts/explore/stores/controls.jsx b/superset/assets/javascripts/explore/stores/controls.jsx index e926a47960422..fa92cd5c66df9 100644 --- a/superset/assets/javascripts/explore/stores/controls.jsx +++ b/superset/assets/javascripts/explore/stores/controls.jsx @@ -100,6 +100,19 @@ export const controls = { description: t('One or many metrics to display'), }, + percent_metrics: { + type: 'SelectControl', + multi: true, + label: t('Percentage Metrics'), + valueKey: 'metric_name', + optionRenderer: m => , + valueRenderer: m => , + mapStateToProps: state => ({ + options: (state.datasource) ? state.datasource.metrics : [], + }), + description: t('Metrics for which percentage of total are to be displayed'), + }, + y_axis_bounds: { type: 'BoundsControl', label: t('Y Axis Bounds'), diff --git a/superset/assets/javascripts/explore/stores/visTypes.js b/superset/assets/javascripts/explore/stores/visTypes.js index 09755550169b0..c2dc18bd8c7ca 100644 --- a/superset/assets/javascripts/explore/stores/visTypes.js +++ b/superset/assets/javascripts/explore/stores/visTypes.js @@ -338,8 +338,9 @@ export const visTypes = { label: t('GROUP BY'), description: t('Use this section if you want a query that aggregates'), controlSetRows: [ - ['groupby', 'metrics'], - ['include_time', null], + ['groupby'], + ['metrics', 'percent_metrics'], + ['include_time'], ['timeseries_limit_metric', 'order_desc'], ], }, diff --git a/superset/assets/visualizations/table.js b/superset/assets/visualizations/table.js index 6985a2587c6a6..2e845b94ac015 100644 --- a/superset/assets/visualizations/table.js +++ b/superset/assets/visualizations/table.js @@ -16,8 +16,10 @@ function tableVis(slice, payload) { const data = payload.data; const fd = slice.formData; - // Removing metrics (aggregates) that are strings let metrics = fd.metrics || []; + // Add percent metrics + metrics = metrics.concat((fd.percent_metrics || []).map(m => '%' + m)); + // Removing metrics (aggregates) that are strings metrics = metrics.filter(m => !isNaN(data.records[0][m])); function col(c) { @@ -42,7 +44,18 @@ function tableVis(slice, payload) { 'table-condensed table-hover dataTable no-footer', true) .attr('width', '100%'); - const cols = data.columns.map(c => slice.datasource.verbose_map[c] || c); + const verboseMap = slice.datasource.verbose_map; + const cols = data.columns.map((c) => { + if (verboseMap[c]) { + return verboseMap[c]; + } + // Handle verbose names for percents + if (c[0] === '%') { + const cName = c.substring(1); + return '% ' + (verboseMap[cName] || cName); + } + return c; + }); table.append('thead').append('tr') .selectAll('th') @@ -72,6 +85,9 @@ function tableVis(slice, payload) { if (isMetric) { html = slice.d3format(c, val); } + if (c[0] === '%') { + html = d3.format('.3p')(val); + } return { col: c, val, diff --git a/superset/viz.py b/superset/viz.py index 1d701b0a656e6..025e9c52b0c52 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -384,13 +384,43 @@ def query_obj(self): d['metrics'] += [sort_by] d['orderby'] = [(sort_by, not fd.get("order_desc", True))] + # Add all percent metrics that are not already in the list + if 'percent_metrics' in fd: + d['metrics'] = d['metrics'] + list(filter( + lambda m: m not in d['metrics'], + fd['percent_metrics'] + )) + d['is_timeseries'] = self.should_be_timeseries() return d def get_data(self, df): + fd = self.form_data if not self.should_be_timeseries() and DTTM_ALIAS in df: del df[DTTM_ALIAS] + # Sum up and compute percentages for all percent metrics + percent_metrics = fd.get('percent_metrics', []) + if len(percent_metrics): + percent_metrics = list(filter(lambda m: m in df, percent_metrics)) + metric_sums = { + m: reduce(lambda a, b: a + b, df[m]) + for m in percent_metrics + } + metric_percents = { + m: list(map(lambda a: a / metric_sums[m], df[m])) + for m in percent_metrics + } + for m in percent_metrics: + m_name = '%' + m + df[m_name] = pd.Series(metric_percents[m], name=m_name) + # Remove metrics that are not in the main metrics list + for m in filter( + lambda m: m not in fd['metrics'] and m in df.columns, + percent_metrics + ): + del df[m] + return dict( records=df.to_dict(orient="records"), columns=list(df.columns), diff --git a/tests/viz_tests.py b/tests/viz_tests.py index fec424a25ac90..99111b5c95e05 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -1,9 +1,236 @@ import unittest import pandas as pd import superset.viz as viz +import superset.utils as utils from superset.utils import DTTM_ALIAS from mock import Mock, patch +from datetime import datetime, timedelta + +class BaseVizTestCase(unittest.TestCase): + def test_constructor_exception_no_datasource(self): + form_data = {} + datasource = None + with self.assertRaises(Exception): + viz.BaseViz(datasource, form_data) + + def test_get_fillna_returns_default_on_null_columns(self): + form_data = { + 'viz_type': 'table', + 'token': '12345', + } + datasource = {'type': 'table'} + test_viz = viz.BaseViz(datasource, form_data); + self.assertEqual( + test_viz.default_fillna, + test_viz.get_fillna_for_columns() + ) + + def test_get_df_returns_empty_df(self): + datasource = Mock() + datasource.type = 'table' + mock_dttm_col = Mock() + mock_dttm_col.python_date_format = Mock() + datasource.get_col = Mock(return_value=mock_dttm_col) + form_data = {'dummy': 123} + query_obj = {'granularity': 'day'} + results = Mock() + results.query = Mock() + results.status = Mock() + results.error_message = None + results.df = Mock() + results.df.empty = True + datasource.query = Mock(return_value=results) + test_viz = viz.BaseViz(datasource, form_data) + result = test_viz.get_df(query_obj) + self.assertEqual(type(result), pd.DataFrame) + self.assertTrue(result.empty) + self.assertEqual(test_viz.error_message, 'No data.') + self.assertEqual(test_viz.status, utils.QueryStatus.FAILED) + + def test_get_df_handles_dttm_col(self): + datasource = Mock() + datasource.type = 'table' + datasource.offset = 1 + mock_dttm_col = Mock() + mock_dttm_col.python_date_format = 'epoch_ms' + datasource.get_col = Mock(return_value=mock_dttm_col) + form_data = {'dummy': 123} + query_obj = {'granularity': 'day'} + results = Mock() + results.query = Mock() + results.status = Mock() + results.error_message = Mock() + df = Mock() + df.columns = [DTTM_ALIAS] + f_datetime = datetime(1960, 1, 1, 5, 0) + df.__getitem__ = Mock(return_value=pd.Series([f_datetime])) + df.__setitem__ = Mock() + df.replace = Mock() + df.fillna = Mock() + results.df = df + results.df.empty = False + datasource.query = Mock(return_value=results) + test_viz = viz.BaseViz(datasource, form_data) + test_viz.get_fillna_for_columns = Mock(return_value=0) + result = test_viz.get_df(query_obj) + mock_call = df.__setitem__.mock_calls[0] + self.assertEqual(mock_call[1][0], DTTM_ALIAS) + self.assertFalse(mock_call[1][1].empty) + self.assertEqual(mock_call[1][1][0], f_datetime) + mock_call = df.__setitem__.mock_calls[1] + self.assertEqual(mock_call[1][0], DTTM_ALIAS) + self.assertEqual(mock_call[1][1][0].hour, 6) + self.assertEqual(mock_call[1][1].dtype, 'datetime64[ns]') + mock_dttm_col.python_date_format = 'utc' + result = test_viz.get_df(query_obj) + mock_call = df.__setitem__.mock_calls[2] + self.assertEqual(mock_call[1][0], DTTM_ALIAS) + self.assertFalse(mock_call[1][1].empty) + self.assertEqual(mock_call[1][1][0].hour, 6) + mock_call = df.__setitem__.mock_calls[3] + self.assertEqual(mock_call[1][0], DTTM_ALIAS) + self.assertEqual(mock_call[1][1][0].hour, 7) + self.assertEqual(mock_call[1][1].dtype, 'datetime64[ns]') + + def test_cache_timeout(self): + datasource = Mock() + form_data = {'cache_timeout': '10'} + test_viz = viz.BaseViz(datasource, form_data) + self.assertEqual(10, test_viz.cache_timeout) + del form_data['cache_timeout'] + datasource.cache_timeout = 156 + self.assertEqual(156, test_viz.cache_timeout) + datasource.cache_timeout = None + datasource.database = Mock() + datasource.database.cache_timeout= 1666 + self.assertEqual(1666, test_viz.cache_timeout) + + +class TableVizTestCase(unittest.TestCase): + def test_get_data_applies_percentage(self): + form_data = { + 'percent_metrics': ['sum__A', 'avg__B'], + 'metrics': ['sum__A', 'count', 'avg__C'], + } + datasource = Mock() + raw = {} + raw['sum__A'] = [15, 20, 25, 40] + raw['avg__B'] = [10, 20, 5, 15] + raw['avg__C'] = [11, 22, 33, 44] + raw['count'] = [6, 7, 8, 9] + raw['groupA'] = ['A', 'B', 'C', 'C'] + raw['groupB'] = ['x', 'x', 'y', 'z'] + df = pd.DataFrame(raw) + test_viz = viz.TableViz(datasource, form_data) + data = test_viz.get_data(df) + # Check method correctly transforms data and computes percents + self.assertEqual(set([ + 'groupA', 'groupB', 'count', + 'sum__A', 'avg__C', + '%sum__A', '%avg__B', + ]), set(data['columns'])) + expected = [ + { + 'groupA': 'A', 'groupB': 'x', + 'count': 6, 'sum__A': 15, 'avg__C': 11, + '%sum__A': 0.15, '%avg__B': 0.2, + }, + { + 'groupA': 'B', 'groupB': 'x', + 'count': 7, 'sum__A': 20, 'avg__C': 22, + '%sum__A': 0.2, '%avg__B': 0.4, + }, + { + 'groupA': 'C', 'groupB': 'y', + 'count': 8, 'sum__A': 25, 'avg__C': 33, + '%sum__A': 0.25, '%avg__B': 0.1, + }, + { + 'groupA': 'C', 'groupB': 'z', + 'count': 9, 'sum__A': 40, 'avg__C': 44, + '%sum__A': 0.40, '%avg__B': 0.3, + }, + ] + self.assertEqual(expected, data['records']) + + @patch('superset.viz.BaseViz.query_obj') + def test_query_obj_merges_percent_metrics(self, super_query_obj): + datasource = Mock() + form_data = { + 'percent_metrics': ['sum__A', 'avg__B', 'max__Y'], + 'metrics': ['sum__A', 'count', 'avg__C'], + } + test_viz = viz.TableViz(datasource, form_data) + f_query_obj = { + 'metrics': form_data['metrics'] + } + super_query_obj.return_value = f_query_obj + query_obj = test_viz.query_obj() + self.assertEqual([ + 'sum__A', 'count', 'avg__C', + 'avg__B', 'max__Y' + ], query_obj['metrics']) + + @patch('superset.viz.BaseViz.query_obj') + def test_query_obj_throws_columns_and_metrics(self, super_query_obj): + datasource = Mock() + form_data = { + 'all_columns': ['A', 'B'], + 'metrics': ['x', 'y'], + } + super_query_obj.return_value = {} + test_viz = viz.TableViz(datasource, form_data) + with self.assertRaises(Exception): + test_viz.query_obj() + del form_data['metrics'] + form_data['groupby'] = ['B', 'C'] + test_viz = viz.TableViz(datasource, form_data) + with self.assertRaises(Exception): + test_viz.query_obj() + + @patch('superset.viz.BaseViz.query_obj') + def test_query_obj_merges_all_columns(self, super_query_obj): + datasource = Mock() + form_data = { + 'all_columns': ['colA', 'colB', 'colC'], + 'order_by_cols': ['["colA", "colB"]', '["colC"]'] + } + super_query_obj.return_value = { + 'columns': ['colD', 'colC'], + 'groupby': ['colA', 'colB'], + } + test_viz = viz.TableViz(datasource, form_data) + query_obj = test_viz.query_obj() + self.assertEqual(form_data['all_columns'], query_obj['columns']) + self.assertEqual([], query_obj['groupby']) + self.assertEqual([['colA', 'colB'], ['colC']], query_obj['orderby']) + + @patch('superset.viz.BaseViz.query_obj') + def test_query_obj_uses_sortby(self, super_query_obj): + datasource = Mock() + form_data = { + 'timeseries_limit_metric': '__time__', + 'order_desc': False + } + super_query_obj.return_value = { + 'metrics': ['colA', 'colB'] + } + test_viz = viz.TableViz(datasource, form_data) + query_obj = test_viz.query_obj() + self.assertEqual([ + 'colA', 'colB', '__time__' + ], query_obj['metrics']) + self.assertEqual([( + '__time__', True + )], query_obj['orderby']) + + def test_should_be_timeseries_raises_when_no_granularity(self): + datasource = Mock() + form_data = {'include_time': True} + test_viz = viz.TableViz(datasource, form_data) + with self.assertRaises(Exception): + test_viz.should_be_timeseries() class PairedTTestTestCase(unittest.TestCase): @@ -97,7 +324,7 @@ def test_get_data_transforms_dataframe(self): }, ], } - self.assertEquals(data, expected) + self.assertEqual(data, expected) def test_get_data_empty_null_keys(self): form_data = { @@ -135,7 +362,7 @@ def test_get_data_empty_null_keys(self): }, ], } - self.assertEquals(data, expected) + self.assertEqual(data, expected) class PartitionVizTestCase(unittest.TestCase):