From 2b9352d2fc63a9a22228fbcbd3555689e983aaf7 Mon Sep 17 00:00:00 2001 From: Jaroslav Tulach Date: Tue, 23 Aug 2022 14:12:22 +0200 Subject: [PATCH] Lazy scatterplot for Vector & Table (#3655) First of all this PR demonstrates how to implement _lazy visualization_: - one needs to write/enhance Enso visualization libraries - this PR adds two optional parameters (`bounds` and `limit`) to `process_to_json_text` function. - the `process_to_json_text` can be tested by standard Enso test harness which this PR also does - then one has to modify JavaScript on the IDE side to construct `setPreprocessor` expression using the optional parameters The idea of _scatter plot lazy visualization_ is to limit the amount of points the IDE requests. Initially the limit is set to `limit=1024`. The `Scatter_Plot.enso` then processes the data and selects/generates the `limit` subset. Right now it includes `min`, `max` in both `x`, `y` axis plus randomly chosen points up to the `limit`. ![Zooming In](https://user-images.githubusercontent.com/26887752/185336126-f4fbd914-7fd8-4f0b-8377-178095401f46.png) The D3 visualization widget is capable of _zooming in_. When that happens the JavaScript widget composes new expression with `bounds` set to the newly visible area. By calling `setPreprocessor` the engine recomputes the visualization data, filters out any data outside of the `bounds` and selects another `limit` points from the new data. The IDE visualization then updates itself to display these more detailed data. Users can zoom-in to see the smallest detail where the number of points gets bellow `limit` or they can select _Fit all_ to see all the data without any `bounds`. # Important Notes Randomly selecting `limit` samples from the dataset may be misleading. Probably implementing _k-means clustering_ (where `k=limit`) would generate more representative approximation. --- CHANGELOG.md | 2 + .../visualization/java_script/scatterPlot.js | 22 +++++- .../0.0.0-dev/src/Scatter_Plot.enso | 71 +++++++++++++++---- .../src/Scatter_Plot_Spec.enso | 44 +++++++++++- 4 files changed, 120 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2befbf94b4c1..f734077a1065 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ visualizations. (Previously, they were placed to the left of the visualizations.) - [Fixed histograms coloring and added a color legend.][3153] +- [Lazy visualization for scatter plot.][3655] - [Fixed broken node whose expression contains non-ASCII characters.][3166] - [Fixed developer console warnings about views being created but not registered.][3181] @@ -185,6 +186,7 @@ [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug [3153]: https://github.com/enso-org/enso/pull/3153 +[3655]: https://github.com/enso-org/enso/pull/3655 [3166]: https://github.com/enso-org/enso/pull/3166 [3181]: https://github.com/enso-org/enso/pull/3181 [3186]: https://github.com/enso-org/enso/pull/3186 diff --git a/app/gui/view/graph-editor/src/builtin/visualization/java_script/scatterPlot.js b/app/gui/view/graph-editor/src/builtin/visualization/java_script/scatterPlot.js index 27f0c79b6a0f..f7c91888a65d 100644 --- a/app/gui/view/graph-editor/src/builtin/visualization/java_script/scatterPlot.js +++ b/app/gui/view/graph-editor/src/builtin/visualization/java_script/scatterPlot.js @@ -17,6 +17,7 @@ const LINEAR_SCALE = 'linear' const LOGARITHMIC_SCALE = 'logarithmic' const VISIBLE_POINTS = 'visible' const BUTTONS_HEIGHT = 25 +const DEFAULT_LIMIT = 1024 /** * A d3.js ScatterPlot visualization. @@ -49,7 +50,9 @@ class ScatterPlot extends Visualization { constructor(data) { super(data) - this.setPreprocessor('process_to_json_text', 'Standard.Visualization.Scatter_Plot') + this.bounds = null + this.limit = DEFAULT_LIMIT + this.updatePreprocessor() this.dataPoints = [] this.axis = { x: { scale: LINEAR_SCALE }, @@ -58,6 +61,14 @@ class ScatterPlot extends Visualization { this.points = { labels: VISIBLE_POINTS } } + updatePreprocessor() { + let fn = 'x -> process_to_json_text x limit=' + this.limit + if (this.bounds) { + fn += ' bounds=[' + this.bounds.join(',') + ']' + } + this.setPreprocessor(fn, 'Standard.Visualization.Scatter_Plot') + } + /** * Presents a scatterplot visualization after receiving `data`. */ @@ -377,6 +388,9 @@ class ScatterPlot extends Visualization { let yMin = zoom.transformedScale.yScale.invert(extent[1][1]) let yMax = zoom.transformedScale.yScale.invert(extent[0][1]) + this.bounds = [xMin, yMin, xMax, yMax] + this.updatePreprocessor() + zoom.transformedScale.xScale.domain([xMin, xMax]) zoom.transformedScale.yScale.domain([yMin, yMax]) @@ -726,7 +740,7 @@ class ScatterPlot extends Visualization { addStyleToElem( 'button', ` - margin-left: 5px; + margin-left: 5px; margin-bottom: 5px; display: inline-block; padding: 2px 10px; @@ -806,6 +820,10 @@ class ScatterPlot extends Visualization { zoom.transformedScale.yScale.domain(domainY) self.zoomingHelper(zoom.transformedScale, boxWidth, scatter, points) + + self.bounds = null + self.limit = DEFAULT_LIMIT + self.updatePreprocessor() } document.addEventListener('keydown', e => { diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso index dd360554261b..5f063a82243b 100644 --- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso +++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Scatter_Plot.enso @@ -3,6 +3,7 @@ from Standard.Base import all import Standard.Table.Data.Column import Standard.Table.Data.Table import Standard.Visualization.Helpers +from Standard.Base.Data.Index_Sub_Range import Sample ## PRIVATE @@ -133,23 +134,63 @@ Table.Table.axes self = ## PRIVATE Vector.Vector.point_data : Vector -> Object -Vector.Vector.point_data self = - self.map_with_index <| i-> elem-> - Json.from_pairs [[X.name,i],[Y.name,elem]] +Vector.Vector.point_data self = + self.map_with_index <| i-> elem-> + Json.from_pairs [[X.name,i],[Y.name,elem]] ## PRIVATE -json_from_table : Table -> Json -json_from_table table = - data = table.point_data +bound_data bounds data = case bounds of + Nothing -> data + _ -> + min_x = bounds.at 0 + min_y = bounds.at 1 + max_x = bounds.at 2 + max_y = bounds.at 3 + + data.filter <| datum-> + x = datum.get "x" . unwrap + y = datum.get "y" . unwrap + + min_x<=x && x<=max_x && min_y<=y && y<=max_y + +type Extreme min_x max_x min_y max_y + +## PRIVATE +limit_data limit data = case limit of + Nothing -> data + _ -> if ((data.length <= limit) || (data.length == 0)) then data else + x datum = (datum.get "x").unwrap + y datum = (datum.get "y").unwrap + + update_extreme current idx point = + new_min_x = if x current.min_x.second > x point then [idx, point] else current.min_x + new_min_y = if y current.min_y.second > y point then [idx, point] else current.min_y + new_max_x = if x current.max_x.second < x point then [idx, point] else current.max_x + new_max_y = if y current.max_y.second < y point then [idx, point] else current.max_y + Extreme new_min_x new_max_x new_min_y new_max_y + + first = [0, data.first] + bounds = case data.fold_with_index (Extreme first first first first) update_extreme of + Extreme min_x max_x min_y max_y -> [min_x, max_x, min_y, max_y] + _ -> [] + extreme = Map.from_vector bounds . values + + if limit <= extreme.length then extreme.take (First limit) else + extreme + data.take (Sample (limit - extreme.length)) + +## PRIVATE +json_from_table : Table -> [Int]|Nothing -> Int|Nothing -> Json +json_from_table table bounds limit = + data = table.point_data |> bound_data bounds |> limit_data limit axes = table.axes Json.from_pairs <| [[data_field,data], [axis_field, axes]] ## PRIVATE -json_from_vector : Vector Any -> Json -json_from_vector vec = - data = [data_field, vec.point_data] +json_from_vector : Vector Any -> [Int]|Nothing -> Int|Nothing -> Json +json_from_vector vec bounds limit = + data = vec.point_data |> bound_data bounds |> limit_data limit axes = [axis_field, Nothing] - Json.from_pairs [data, axes] + Json.from_pairs <| [[data_field, data], axes] ## PRIVATE @@ -160,12 +201,12 @@ json_from_vector vec = Arguments: - value: the value to be visualized. process_to_json_text : Any -> Text -process_to_json_text value = +process_to_json_text value bounds=Nothing limit=Nothing = json = case value of - Column.Column _ -> json_from_table value.to_table - Table.Table _ -> json_from_table value - Vector.Vector _ -> json_from_vector value - _ -> json_from_vector value.to_vector + Column.Column _ -> json_from_table value.to_table bounds limit + Table.Table _ -> json_from_table value bounds limit + Vector.Vector _ -> json_from_vector value bounds limit + _ -> json_from_vector value.to_vector bounds limit json.to_text diff --git a/test/Visualization_Tests/src/Scatter_Plot_Spec.enso b/test/Visualization_Tests/src/Scatter_Plot_Spec.enso index 9df3f8030848..3d03d7c59cb7 100644 --- a/test/Visualization_Tests/src/Scatter_Plot_Spec.enso +++ b/test/Visualization_Tests/src/Scatter_Plot_Spec.enso @@ -10,8 +10,7 @@ import Standard.Test import project spec = - expect value axis_expected_text data_expected_text = - text = Scatter_Plot.process_to_json_text value + expect_text text axis_expected_text data_expected_text = json = Json.parse text json.fields.keys.should_equal ['axis','data'] @@ -20,6 +19,10 @@ spec = expected_result = Json.from_pairs [expected_axis_labels, expected_data_pair] json.should_equal expected_result + expect value axis_expected_text data_expected_text = + text = Scatter_Plot.process_to_json_text value + expect_text text axis_expected_text data_expected_text + index = Scatter_Plot.index_name axis label = Json.from_pairs [['label',label]] labels x y = Json.from_pairs [['x', axis x], ['y', axis y]] . to_text @@ -70,6 +73,17 @@ spec = table = Table.from_rows header [row_1, row_2] expect table (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]' + Test.specify "provided only recognized columns within bounds" <| + header = ['x', 'y' , 'bar' , 'size'] + row_1 = [1 , 1 , '11' , 30 ] + row_2 = [11 , 10 , 'aa' , 40 ] + row_3 = [21 , 20 , 'bb' , 50 ] + row_4 = [31 , 30 , 'cc' , 60 ] + table = Table.from_rows header [row_1, row_2, row_3, row_4] + bounds = [0,5,25,25] + text = Scatter_Plot.process_to_json_text table bounds + expect_text text (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]' + Test.specify "used specified numeric index for x if missing 'x' column from table" <| header = [ 'y' , 'foo', 'bar', 'baz' , 'size'] row_1 = [ 10 , 'aa' , 12 , 14 , 40 ] @@ -96,6 +110,30 @@ spec = vector = [0,10,20] expect vector no_labels '[{"x":0,"y":0},{"x":1,"y":10},{"x":2,"y":20}]' + Test.specify "limit the number of elements" <| + vector = [0,10,20,30] + text = Scatter_Plot.process_to_json_text vector limit=2 + json = Json.parse text + json.fields.keys.should_equal ['axis','data'] + data = json.fields.get 'data' + data.unwrap.length . should_equal 2 + + + Test.specify "limit the number of squared elements" <| + vector = (-15).up_to 15 . map (x -> x * x) + text = Scatter_Plot.process_to_json_text vector limit=10 + json = Json.parse text + json.fields.keys.should_equal ['axis','data'] + data = (json.fields.get 'data') . unwrap + data.length . should_equal 10 + (data.take (First 3)).to_text . should_equal '[[[\'x\', 0], [\'y\', 225]], [[\'x\', 15], [\'y\', 0]], [[\'x\', 29], [\'y\', 196]]]' + + Test.specify "filter the elements" <| + vector = [0,10,20,30] + bounds = [0,5,10,25] + text = Scatter_Plot.process_to_json_text vector bounds + expect_text text no_labels '[{"x":1,"y":10},{"x":2,"y":20}]' + Test.specify "using indices for x if given a column" <| column = Column.from_vector 'some_col' [10,2,3] expect column (labels 'index' 'some_col') '[{"x":0,"y":10},{"x":1,"y":2},{"x":2,"y":3}]' @@ -103,3 +141,5 @@ spec = Test.specify "using indices for x if given a range" <| value = 2.up_to 5 expect value no_labels '[{"x":0,"y":2},{"x":1,"y":3},{"x":2,"y":4}]' + +main = Test.Suite.run_main spec