diff --git a/superset/assets/images/viz_thumbnails/partition.png b/superset/assets/images/viz_thumbnails/partition.png new file mode 100644 index 0000000000000..7cf6e1358a5fe Binary files /dev/null and b/superset/assets/images/viz_thumbnails/partition.png differ diff --git a/superset/assets/javascripts/components/OptionDescription.jsx b/superset/assets/javascripts/components/OptionDescription.jsx new file mode 100644 index 0000000000000..60cc731e1f514 --- /dev/null +++ b/superset/assets/javascripts/components/OptionDescription.jsx @@ -0,0 +1,28 @@ +import React from 'react'; +import PropTypes from 'prop-types'; + +import InfoTooltipWithTrigger from './InfoTooltipWithTrigger'; + +const propTypes = { + option: PropTypes.object.isRequired, +}; + +// This component provides a general tooltip for options +// in a SelectControl +export default function OptionDescription({ option }) { + return ( + + + {option.label} + + {option.description && + + } + ); +} +OptionDescription.propTypes = propTypes; diff --git a/superset/assets/javascripts/explore/stores/controls.jsx b/superset/assets/javascripts/explore/stores/controls.jsx index 78ef33cf41318..da8f22dc6ff5d 100644 --- a/superset/assets/javascripts/explore/stores/controls.jsx +++ b/superset/assets/javascripts/explore/stores/controls.jsx @@ -4,6 +4,7 @@ import * as v from '../validators'; import { ALL_COLOR_SCHEMES, spectrums } from '../../modules/colors'; import MetricOption from '../../components/MetricOption'; import ColumnOption from '../../components/ColumnOption'; +import OptionDescription from '../../components/OptionDescription'; import { t } from '../../locales'; const D3_FORMAT_DOCS = 'D3 format syntax: https://github.com/d3/d3-format'; @@ -98,6 +99,7 @@ export const controls = { }), description: t('One or many metrics to display'), }, + y_axis_bounds: { type: 'BoundsControl', label: t('Y Axis Bounds'), @@ -108,6 +110,7 @@ export const controls = { "this feature will only expand the axis range. It won't " + "narrow the data's extent."), }, + order_by_cols: { type: 'SelectControl', multi: true, @@ -909,6 +912,16 @@ export const controls = { description: D3_FORMAT_DOCS, }, + date_time_format: { + type: 'SelectControl', + freeForm: true, + label: t('Date Time Format'), + renderTrigger: true, + default: 'smart_date', + choices: D3_TIME_FORMAT_OPTIONS, + description: D3_FORMAT_DOCS, + }, + markup_type: { type: 'SelectControl', label: t('Markup Type'), @@ -1136,6 +1149,14 @@ export const controls = { description: t('Use a log scale for the X axis'), }, + log_scale: { + type: 'CheckboxControl', + label: t('Log Scale'), + default: false, + renderTrigger: true, + description: t('Use a log scale'), + }, + donut: { type: 'CheckboxControl', label: t('Donut'), @@ -1456,5 +1477,85 @@ export const controls = { controlName: 'TimeSeriesColumnControl', }, + time_series_option: { + type: 'SelectControl', + label: t('Options'), + validators: [v.nonEmpty], + default: 'not_time', + valueKey: 'value', + options: [ + { + label: t('Not Time Series'), + value: 'not_time', + description: t('Ignore time'), + }, + { + label: t('Time Series'), + value: 'time_series', + description: t('Standard time series'), + }, + { + label: t('Aggregate Mean'), + value: 'agg_mean', + description: t('Mean of values over specified period'), + }, + { + label: t('Aggregate Sum'), + value: 'agg_sum', + description: t('Sum of values over specified period'), + }, + { + label: t('Difference'), + value: 'point_diff', + description: t('Metric change in value from `since` to `until`'), + }, + { + label: t('Percent Change'), + value: 'point_percent', + description: t('Metric percent change in value from `since` to `until`'), + }, + { + label: t('Factor'), + value: 'point_factor', + description: t('Metric factor change from `since` to `until`'), + }, + { + label: t('Advanced Analytics'), + value: 'adv_anal', + description: t('Use the Advanced Analytics options below'), + }, + ], + optionRenderer: op => , + valueRenderer: op => , + description: t('Settings for time series'), + }, + + equal_date_size: { + type: 'CheckboxControl', + label: t('Equal Date Sizes'), + default: true, + renderTrigger: true, + description: t('Check to force date partitions to have the same height'), + }, + + partition_limit: { + type: 'TextControl', + label: t('Partition Limit'), + isInt: true, + default: '5', + description: + t('The maximum number of subdivisions of each group; ' + + 'lower values are pruned first'), + }, + + partition_threshold: { + type: 'TextControl', + label: t('Partition Threshold'), + isFloat: true, + default: '0.05', + description: + t('Partitions whose height to parent height proportions are ' + + 'below this value are pruned'), + }, }; export default controls; diff --git a/superset/assets/javascripts/explore/stores/visTypes.js b/superset/assets/javascripts/explore/stores/visTypes.js index da142aec694e7..09755550169b0 100644 --- a/superset/assets/javascripts/explore/stores/visTypes.js +++ b/superset/assets/javascripts/explore/stores/visTypes.js @@ -1155,6 +1155,33 @@ export const visTypes = { }, ], }, + + partition: { + label: 'Partition Diagram', + showOnExplore: true, + controlPanelSections: [ + sections.NVD3TimeSeries[0], + { + label: t('Time Series Options'), + expanded: true, + controlSetRows: [ + ['time_series_option'], + ], + }, + { + label: t('Chart Options'), + expanded: true, + controlSetRows: [ + ['color_scheme'], + ['number_format', 'date_time_format'], + ['partition_limit', 'partition_threshold'], + ['log_scale', 'equal_date_size'], + ['rich_tooltip'], + ], + }, + sections.NVD3TimeSeries[1], + ], + }, }; export default visTypes; diff --git a/superset/assets/package.json b/superset/assets/package.json index 3dfdb78240f9b..06ae76563b553 100644 --- a/superset/assets/package.json +++ b/superset/assets/package.json @@ -52,6 +52,7 @@ "d3-sankey": "^0.4.2", "d3-svg-legend": "^1.x", "d3-tip": "^0.6.7", + "d3-hierarchy": "^1.1.5", "datamaps": "^0.5.8", "datatables.net-bs": "^1.10.15", "distributions": "^1.0.0", diff --git a/superset/assets/visualizations/main.js b/superset/assets/visualizations/main.js index dc5ee30516270..78e81ab6d7340 100644 --- a/superset/assets/visualizations/main.js +++ b/superset/assets/visualizations/main.js @@ -35,5 +35,6 @@ const vizMap = { dual_line: require('./nvd3_vis.js'), event_flow: require('./EventFlow.jsx'), paired_ttest: require('./paired_ttest.jsx'), + partition: require('./partition.js'), }; export default vizMap; diff --git a/superset/assets/visualizations/partition.css b/superset/assets/visualizations/partition.css new file mode 100644 index 0000000000000..e23cca795203f --- /dev/null +++ b/superset/assets/visualizations/partition.css @@ -0,0 +1,27 @@ +.partition .chart { + display: block; + margin: auto; + font-size: 11px; +} + +.partition rect { + stroke: #eee; + fill: #aaa; + fill-opacity: .8; + transition: fill-opacity 180ms linear; + cursor: pointer; +} + +.partition rect:hover { + fill-opacity: 1; +} + +.partition g text { + font-weight: bold; + pointer-events: none; + fill: rgba(0, 0, 0, 0.8); +} + +.partition g:hover text { + fill: rgba(0, 0, 0, 1); +} diff --git a/superset/assets/visualizations/partition.js b/superset/assets/visualizations/partition.js new file mode 100644 index 0000000000000..a91611ce007bf --- /dev/null +++ b/superset/assets/visualizations/partition.js @@ -0,0 +1,333 @@ +/* eslint no-param-reassign: [2, {"props": false}] */ +/* eslint no-use-before-define: ["error", { "functions": false }] */ +import d3 from 'd3'; +import { + d3TimeFormatPreset, +} from '../javascripts/modules/utils'; +import { getColorFromScheme } from '../javascripts/modules/colors'; + +import './partition.css'; + +d3.hierarchy = require('d3-hierarchy').hierarchy; +d3.partition = require('d3-hierarchy').partition; + +function init(root) { + // Compute dx, dy, x, y for each node and + // return an array of nodes in breadth-first order + const flat = []; + const dy = 1.0 / (root.height + 1); + let prev = null; + root.each((n) => { + n.y = dy * n.depth; + n.dy = dy; + if (!n.parent) { + n.x = 0; + n.dx = 1; + } else { + n.x = prev.depth === n.parent.depth ? 0 : prev.x + prev.dx; + n.dx = n.weight / n.parent.sum * n.parent.dx; + } + prev = n; + flat.push(n); + }); + return flat; +} + +// This vis is based on +// http://mbostock.github.io/d3/talk/20111018/partition.html +function partitionVis(slice, payload) { + const data = payload.data; + const fd = slice.formData; + const div = d3.select(slice.selector); + const metrics = fd.metrics || []; + + // Chart options + const logScale = fd.log_scale || false; + const chartType = fd.time_series_option || 'not_time'; + const hasTime = ['adv_anal', 'time_series'].indexOf(chartType) >= 0; + const format = d3.format(fd.number_format); + const timeFormat = d3TimeFormatPreset(fd.date_time_format); + + div.selectAll('*').remove(); + d3.selectAll('.nvtooltip').remove(); + const tooltip = d3 + .select('body') + .append('div') + .attr('class', 'nvtooltip') + .style('opacity', 0) + .style('top', 0) + .style('left', 0) + .style('position', 'fixed'); + + function drawVis(i, dat) { + const datum = dat[i]; + const w = slice.width(); + const h = slice.height() / data.length; + const x = d3.scale.linear().range([0, w]); + const y = d3.scale.linear().range([0, h]); + + const viz = div + .append('div') + .attr('class', 'chart') + .style('width', w + 'px') + .style('height', h + 'px') + .append('svg:svg') + .attr('width', w) + .attr('height', h); + + // Add padding between multiple visualizations + if (i !== data.length - 1 && data.length > 1) { + viz.style('padding-bottom', '3px'); + } + if (i !== 0 && data.length > 1) { + viz.style('padding-top', '3px'); + } + + const root = d3.hierarchy(datum); + + function hasDateNode(n) { + return metrics.indexOf(n.data.name) >= 0 && hasTime; + } + + // node.name is the metric/group name + // node.disp is the display value + // node.value determines sorting order + // node.weight determines partition height + // node.sum is the sum of children weights + root.eachAfter((n) => { + n.disp = n.data.val; + n.value = n.disp < 0 ? -n.disp : n.disp; + n.weight = n.value; + n.name = n.data.name; + // If the parent is a metric and we still have + // the time column, perform a date-time format + if (n.parent && hasDateNode(n.parent)) { + // Format timestamp values + n.weight = fd.equal_date_size ? 1 : n.value; + n.value = n.name; + n.name = timeFormat(n.name); + } + if (logScale) n.weight = Math.log(n.weight + 1); + n.disp = n.disp && !isNaN(n.disp) && isFinite(n.disp) ? format(n.disp) : ''; + }); + // Perform sort by weight + root.sort((a, b) => { + const v = b.value - a.value; + if (v === 0) { + return b.name > a.name ? 1 : -1; + } + return v; + }); + + // Prune data based on partition limit and threshold + // both are applied at the same time + if (fd.partition_threshold && fd.partition_threshold >= 0) { + // Compute weight sums as we go + root.each((n) => { + n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1; + if (n.children) { + // Dates are not ordered by weight + if (hasDateNode(n)) { + if (fd.equal_date_size) { + return; + } + const removeIndices = []; + // Keep at least one child + for (let j = 1; j < n.children.length; j++) { + if (n.children[j].weight / n.sum < fd.partition_threshold) { + removeIndices.push(j); + } + } + for (let j = removeIndices.length - 1; j >= 0; j--) { + n.children.splice(removeIndices[j], 1); + } + } else { + // Find first child that falls below the threshold + let j; + for (j = 1; j < n.children.length; j++) { + if (n.children[j].weight / n.sum < fd.partition_threshold) { + break; + } + } + n.children = n.children.slice(0, j); + } + } + }); + } + if (fd.partition_limit && fd.partition_limit >= 0) { + root.each((n) => { + if (n.children && n.children.length > fd.partition_limit) { + if (!hasDateNode(n)) { + n.children = n.children.slice(0, fd.partition_limit); + } + } + }); + } + // Compute final weight sums + root.eachAfter((n) => { + n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1; + }); + + const verboseMap = slice.datasource.verbose_map; + function getCategory(depth) { + if (!depth) { + return 'Metric'; + } + if (hasTime && depth === 1) { + return 'Date'; + } + const col = fd.groupby[depth - (hasTime ? 2 : 1)]; + return verboseMap[col] || col; + } + + function getAncestors(d) { + const ancestors = [d]; + let node = d; + while (node.parent) { + ancestors.push(node.parent); + node = node.parent; + } + return ancestors; + } + + function positionAndPopulate(tip, d) { + let t = ''; + if (!fd.rich_tooltip) { + t += ( + '' + ); + t += ( + '' + + '' + + `` + + `` + + '' + ); + } else { + const nodes = getAncestors(d); + nodes.forEach((n) => { + const atNode = n.depth === d.depth; + t += ''; + t += ( + `` + + `' + + `` + + `` + + `` + + '' + ); + }); + } + t += '
' + + `${getCategory(d.depth)}` + + '
' + + `
' + + '
${d.name}${d.disp}
` + + '
' + + '
${n.name}${n.disp}${getCategory(n.depth)}
'; + tip.html(t) + .style('left', (d3.event.pageX + 13) + 'px') + .style('top', (d3.event.pageY - 10) + 'px'); + } + + const g = viz + .selectAll('g') + .data(init(root)) + .enter() + .append('svg:g') + .attr('transform', d => `translate(${x(d.y)},${y(d.x)})`) + .on('click', click) + .on('mouseover', (d) => { + tooltip + .interrupt() + .transition() + .duration(100) + .style('opacity', 0.9); + positionAndPopulate(tooltip, d); + }) + .on('mousemove', (d) => { + positionAndPopulate(tooltip, d); + }) + .on('mouseout', () => { + tooltip + .interrupt() + .transition() + .duration(250) + .style('opacity', 0); + }); + + let kx = w / root.dx; + let ky = h / 1; + + g.append('svg:rect') + .attr('width', root.dy * kx) + .attr('height', d => d.dx * ky); + + g.append('svg:text') + .attr('transform', transform) + .attr('dy', '0.35em') + .style('opacity', d => d.dx * ky > 12 ? 1 : 0) + .text((d) => { + if (!d.disp) { + return d.name; + } + return `${d.name}: ${d.disp}`; + }); + + // Apply color scheme + g.selectAll('rect') + .style('fill', (d) => { + d.color = getColorFromScheme(d.name, fd.color_scheme); + return d.color; + }); + + // Zoom out when clicking outside vis + // d3.select(window) + // .on('click', () => click(root)); + + // Keep text centered in its division + function transform(d) { + return `translate(8,${d.dx * ky / 2})`; + } + + // When clicking a subdivision, the vis will zoom in to it + function click(d) { + if (!d.children) { + if (d.parent) { + // Clicking on the rightmost level should zoom in + return click(d.parent); + } + return false; + } + kx = (d.y ? w - 40 : w) / (1 - d.y); + ky = h / d.dx; + x.domain([d.y, 1]).range([d.y ? 40 : 0, w]); + y.domain([d.x, d.x + d.dx]); + + const t = g + .transition() + .duration(d3.event.altKey ? 7500 : 750) + .attr('transform', nd => `translate(${x(nd.y)},${y(nd.x)})`); + + t.select('rect') + .attr('width', d.dy * kx) + .attr('height', nd => nd.dx * ky); + + t.select('text') + .attr('transform', transform) + .style('opacity', nd => nd.dx * ky > 12 ? 1 : 0); + + d3.event.stopPropagation(); + return true; + } + } + for (let i = 0; i < data.length; i++) { + drawVis(i, data); + } +} + +module.exports = partitionVis; diff --git a/superset/viz.py b/superset/viz.py index a800bc0375a88..1d701b0a656e6 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -27,6 +27,7 @@ from markdown import markdown import simplejson as json from six import string_types, PY3 +from six.moves import reduce from dateutil import relativedelta as rdelta from superset import app, utils, cache, get_manifest_file @@ -915,7 +916,7 @@ def to_series(self, df, classed='', title_suffix=''): if isinstance(series_title, string_types): series_title += title_suffix elif title_suffix and isinstance(series_title, (list, tuple)): - series_title.append(title_suffix) + series_title = series_title + (title_suffix,) d = { "key": series_title, @@ -928,16 +929,24 @@ def to_series(self, df, classed='', title_suffix=''): chart_data.append(d) return chart_data - def process_data(self, df): + def process_data(self, df, aggregate=False): fd = self.form_data df = df.fillna(0) if fd.get("granularity") == "all": raise Exception(_("Pick a time granularity for your time series")) - df = df.pivot_table( - index=DTTM_ALIAS, - columns=fd.get('groupby'), - values=fd.get('metrics')) + if not aggregate: + df = df.pivot_table( + index=DTTM_ALIAS, + columns=fd.get('groupby'), + values=fd.get('metrics')) + else: + df = df.pivot_table( + index=DTTM_ALIAS, + columns=fd.get('groupby'), + values=fd.get('metrics'), + fill_value=0, + aggfunc=sum) fm = fd.get("resample_fillmethod") if not fm: @@ -1782,6 +1791,142 @@ def get_data(self, df): return data +class PartitionViz(NVD3TimeSeriesViz): + + """ + A hierarchical data visualization with support for time series. + """ + + viz_type = 'partition' + verbose_name = _("Partition Diagram") + + def query_obj(self): + query_obj = super(PartitionViz, self).query_obj() + time_op = self.form_data.get('time_series_option', 'not_time') + # Return time series data if the user specifies so + query_obj['is_timeseries'] = time_op != 'not_time' + return query_obj + + def levels_for(self, time_op, groups, df): + """ + Compute the partition at each `level` from the dataframe. + """ + levels = {} + for i in range(0, len(groups) + 1): + agg_df = df.groupby(groups[:i]) if i else df + levels[i] = ( + agg_df.mean() if time_op == 'agg_mean' + else agg_df.sum(numeric_only=True)) + return levels + + def levels_for_diff(self, time_op, groups, df): + # Obtain a unique list of the time grains + times = list(set(df[DTTM_ALIAS])) + times.sort() + until = times[len(times) - 1] + since = times[0] + # Function describing how to calculate the difference + func = { + 'point_diff': [ + pd.Series.sub, + lambda a, b, fill_value: a - b, + ], + 'point_factor': [ + pd.Series.div, + lambda a, b, fill_value: a / float(b), + ], + 'point_percent': [ + lambda a, b, fill_value=0: a.div(b, fill_value=fill_value) - 1, + lambda a, b, fill_value: a / float(b) - 1, + ], + }[time_op] + agg_df = df.groupby(DTTM_ALIAS).sum() + levels = {0: pd.Series({ + m: func[1](agg_df[m][until], agg_df[m][since], 0) + for m in agg_df.columns})} + for i in range(1, len(groups) + 1): + agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum() + levels[i] = pd.DataFrame({ + m: func[0](agg_df[m][until], agg_df[m][since], fill_value=0) + for m in agg_df.columns}) + return levels + + def levels_for_time(self, groups, df): + procs = {} + for i in range(0, len(groups) + 1): + self.form_data['groupby'] = groups[:i] + df_drop = df.drop(groups[i:], 1) + procs[i] = self.process_data(df_drop, aggregate=True).fillna(0) + self.form_data['groupby'] = groups + return procs + + def nest_values(self, levels, level=0, metric=None, dims=()): + """ + Nest values at each level on the back-end with + access and setting, instead of summing from the bottom. + """ + if not level: + return [{ + 'name': m, + 'val': levels[0][m], + 'children': self.nest_values(levels, 1, m), + } for m in levels[0].index] + if level == 1: + return [{ + 'name': i, + 'val': levels[1][metric][i], + 'children': self.nest_values(levels, 2, metric, (i,)), + } for i in levels[1][metric].index] + if level >= len(levels): + return [] + return [{ + 'name': i, + 'val': levels[level][metric][dims][i], + 'children': self.nest_values( + levels, level + 1, metric, dims + (i,) + ), + } for i in levels[level][metric][dims].index] + + def nest_procs(self, procs, level=-1, dims=(), time=None): + if level == -1: + return [{ + 'name': m, + 'children': self.nest_procs(procs, 0, (m,)), + } for m in procs[0].columns] + if not level: + return [{ + 'name': t, + 'val': procs[0][dims[0]][t], + 'children': self.nest_procs(procs, 1, dims, t), + } for t in procs[0].index] + if level >= len(procs): + return [] + return [{ + 'name': i, + 'val': procs[level][dims][i][time], + 'children': self.nest_procs(procs, level + 1, dims + (i,), time) + } for i in procs[level][dims].columns] + + def get_data(self, df): + fd = self.form_data + groups = fd.get('groupby', []) + time_op = fd.get('time_series_option', 'not_time') + if not len(groups): + raise ValueError('Please choose at least one groupby') + if time_op == 'not_time': + levels = self.levels_for('agg_sum', groups, df) + elif time_op in ['agg_sum', 'agg_mean']: + levels = self.levels_for(time_op, groups, df) + elif time_op in ['point_diff', 'point_factor', 'point_percent']: + levels = self.levels_for_diff(time_op, groups, df) + elif time_op == 'adv_anal': + procs = self.levels_for_time(groups, df) + return self.nest_procs(procs) + else: + levels = self.levels_for('agg_sum', [DTTM_ALIAS] + groups, df) + return self.nest_values(levels) + + viz_types = { o.viz_type: o for o in globals().values() if ( diff --git a/tests/viz_tests.py b/tests/viz_tests.py index a4beab3e987d8..fec424a25ac90 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -3,6 +3,7 @@ import superset.viz as viz from superset.utils import DTTM_ALIAS +from mock import Mock, patch class PairedTTestTestCase(unittest.TestCase): @@ -135,3 +136,227 @@ def test_get_data_empty_null_keys(self): ], } self.assertEquals(data, expected) + + +class PartitionVizTestCase(unittest.TestCase): + + @patch('superset.viz.BaseViz.query_obj') + def test_query_obj_time_series_option(self, super_query_obj): + datasource = Mock() + form_data = {} + test_viz = viz.PartitionViz(datasource, form_data) + super_query_obj.return_value = {} + query_obj = test_viz.query_obj() + self.assertFalse(query_obj['is_timeseries']) + test_viz.form_data['time_series_option'] = 'agg_sum' + query_obj = test_viz.query_obj() + self.assertTrue(query_obj['is_timeseries']) + + def test_levels_for_computes_levels(self): + raw = {} + raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300] + raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1'] + raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2'] + raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3'] + raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9] + raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90] + raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900] + df = pd.DataFrame(raw) + groups = ['groupA', 'groupB', 'groupC'] + time_op = 'agg_sum' + test_viz = viz.PartitionViz(Mock(), {}) + levels = test_viz.levels_for(time_op, groups, df) + self.assertEqual(4, len(levels)) + expected = { + DTTM_ALIAS: 1800, + 'metric1': 45, + 'metric2': 450, + 'metric3': 4500, + } + self.assertEqual(expected, levels[0].to_dict()) + expected = { + DTTM_ALIAS: {'a1': 600, 'b1': 600, 'c1': 600}, + 'metric1': {'a1': 6, 'b1': 15, 'c1': 24}, + 'metric2': {'a1': 60, 'b1': 150, 'c1': 240}, + 'metric3': {'a1': 600, 'b1': 1500, 'c1': 2400}, + } + self.assertEqual(expected, levels[1].to_dict()) + self.assertEqual(['groupA', 'groupB'], levels[2].index.names) + self.assertEqual( + ['groupA', 'groupB', 'groupC'], + levels[3].index.names, + ) + time_op = 'agg_mean' + levels = test_viz.levels_for(time_op, groups, df) + self.assertEqual(4, len(levels)) + expected = { + DTTM_ALIAS: 200.0, + 'metric1': 5.0, + 'metric2': 50.0, + 'metric3': 500.0, + } + self.assertEqual(expected, levels[0].to_dict()) + expected = { + DTTM_ALIAS: {'a1': 200, 'c1': 200, 'b1': 200}, + 'metric1': {'a1': 2, 'b1': 5, 'c1': 8}, + 'metric2': {'a1': 20, 'b1': 50, 'c1': 80}, + 'metric3': {'a1': 200, 'b1': 500, 'c1': 800}, + } + self.assertEqual(expected, levels[1].to_dict()) + self.assertEqual(['groupA', 'groupB'], levels[2].index.names) + self.assertEqual( + ['groupA', 'groupB', 'groupC'], + levels[3].index.names, + ) + + def test_levels_for_diff_computes_difference(self): + raw = {} + raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300] + raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1'] + raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2'] + raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3'] + raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9] + raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90] + raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900] + df = pd.DataFrame(raw) + groups = ['groupA', 'groupB', 'groupC'] + test_viz = viz.PartitionViz(Mock(), {}) + time_op = 'point_diff' + levels = test_viz.levels_for_diff(time_op, groups, df) + expected = { + 'metric1': 6, + 'metric2': 60, + 'metric3': 600, + } + self.assertEqual(expected, levels[0].to_dict()) + expected = { + 'metric1': {'a1': 2, 'b1': 2, 'c1': 2}, + 'metric2': {'a1': 20, 'b1': 20, 'c1': 20}, + 'metric3': {'a1': 200, 'b1': 200, 'c1': 200}, + } + self.assertEqual(expected, levels[1].to_dict()) + self.assertEqual(4, len(levels)) + self.assertEqual(['groupA', 'groupB', 'groupC'], levels[3].index.names) + + def test_levels_for_time_calls_process_data_and_drops_cols(self): + raw = {} + raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300] + raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1'] + raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2'] + raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3'] + raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9] + raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90] + raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900] + df = pd.DataFrame(raw) + groups = ['groupA', 'groupB', 'groupC'] + test_viz = viz.PartitionViz(Mock(), {'groupby': groups}) + + def return_args(df_drop, aggregate): + return df_drop + test_viz.process_data = Mock(side_effect=return_args) + levels = test_viz.levels_for_time(groups, df) + self.assertEqual(4, len(levels)) + cols = [DTTM_ALIAS, 'metric1', 'metric2', 'metric3'] + self.assertEqual(sorted(cols), sorted(levels[0].columns.tolist())) + cols += ['groupA'] + self.assertEqual(sorted(cols), sorted(levels[1].columns.tolist())) + cols += ['groupB'] + self.assertEqual(sorted(cols), sorted(levels[2].columns.tolist())) + cols += ['groupC'] + self.assertEqual(sorted(cols), sorted(levels[3].columns.tolist())) + self.assertEqual(4, len(test_viz.process_data.mock_calls)) + + def test_nest_values_returns_hierarchy(self): + raw = {} + raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1'] + raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2'] + raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3'] + raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9] + raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90] + raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900] + df = pd.DataFrame(raw) + test_viz = viz.PartitionViz(Mock(), {}) + groups = ['groupA', 'groupB', 'groupC'] + levels = test_viz.levels_for('agg_sum', groups, df) + nest = test_viz.nest_values(levels) + self.assertEqual(3, len(nest)) + for i in range(0, 3): + self.assertEqual('metric' + str(i + 1), nest[i]['name']) + self.assertEqual(3, len(nest[0]['children'])) + self.assertEqual(1, len(nest[0]['children'][0]['children'])) + self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children'])) + + def test_nest_procs_returns_hierarchy(self): + raw = {} + raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300] + raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1'] + raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2'] + raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3'] + raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9] + raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90] + raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900] + df = pd.DataFrame(raw) + test_viz = viz.PartitionViz(Mock(), {}) + groups = ['groupA', 'groupB', 'groupC'] + metrics = ['metric1', 'metric2', 'metric3'] + procs = {} + for i in range(0, 4): + df_drop = df.drop(groups[i:], 1) + pivot = df_drop.pivot_table( + index=DTTM_ALIAS, + columns=groups[:i], + values=metrics, + ) + procs[i] = pivot + nest = test_viz.nest_procs(procs) + self.assertEqual(3, len(nest)) + for i in range(0, 3): + self.assertEqual('metric' + str(i + 1), nest[i]['name']) + self.assertEqual(None, nest[i].get('val')) + self.assertEqual(3, len(nest[0]['children'])) + self.assertEqual(3, len(nest[0]['children'][0]['children'])) + self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children'])) + self.assertEqual(1, + len(nest[0]['children'] + [0]['children'] + [0]['children'] + [0]['children']) + ) + + def test_get_data_calls_correct_method(self): + test_viz = viz.PartitionViz(Mock(), {}) + df = Mock() + with self.assertRaises(ValueError): + test_viz.get_data(df) + test_viz.levels_for = Mock(return_value=1) + test_viz.nest_values = Mock(return_value=1) + test_viz.form_data['groupby'] = ['groups'] + test_viz.form_data['time_series_option'] = 'not_time' + test_viz.get_data(df) + self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[0][1][0]) + test_viz.form_data['time_series_option'] = 'agg_sum' + test_viz.get_data(df) + self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[1][1][0]) + test_viz.form_data['time_series_option'] = 'agg_mean' + test_viz.get_data(df) + self.assertEqual('agg_mean', test_viz.levels_for.mock_calls[2][1][0]) + test_viz.form_data['time_series_option'] = 'point_diff' + test_viz.levels_for_diff = Mock(return_value=1) + test_viz.get_data(df) + self.assertEqual('point_diff', test_viz.levels_for_diff.mock_calls[0][1][0]) + test_viz.form_data['time_series_option'] = 'point_percent' + test_viz.get_data(df) + self.assertEqual('point_percent', test_viz.levels_for_diff.mock_calls[1][1][0]) + test_viz.form_data['time_series_option'] = 'point_factor' + test_viz.get_data(df) + self.assertEqual('point_factor', test_viz.levels_for_diff.mock_calls[2][1][0]) + test_viz.levels_for_time = Mock(return_value=1) + test_viz.nest_procs = Mock(return_value=1) + test_viz.form_data['time_series_option'] = 'adv_anal' + test_viz.get_data(df) + self.assertEqual(1, len(test_viz.levels_for_time.mock_calls)) + self.assertEqual(1, len(test_viz.nest_procs.mock_calls)) + test_viz.form_data['time_series_option'] = 'time_series' + test_viz.get_data(df) + self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[3][1][0]) + self.assertEqual(7, len(test_viz.nest_values.mock_calls))