-
Notifications
You must be signed in to change notification settings - Fork 624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
boxplot doesn't works with column encoding #4156
Comments
I know this is likely overkill, but just to note that the same problem exists for |
The issue seems to not be in the boxplot logic. |
Here is a normalized spec that shows the issue: {
"data": {
"values": [
{
"homework_done": false,
"session_time_m": 2,
"session_hour": 1
},
{
"homework_done": false,
"session_time_m": 0,
"session_hour": 2
}
]
},
"$schema": "https://vega.github.io/schema/vega-lite/v3.0.0.json",
"facet": {
"column": {
"type": "nominal",
"field": "session_hour"
}
},
"spec": {
"layer": [
{
"transform": [
{
"aggregate": [
{
"op": "q1",
"field": "session_time_m",
"as": "lower_box_session_time_m"
},
{
"op": "q3",
"field": "session_time_m",
"as": "upper_box_session_time_m"
},
{
"op": "median",
"field": "session_time_m",
"as": "mid_box_session_time_m"
},
{
"op": "min",
"field": "session_time_m",
"as": "min_session_time_m"
},
{
"op": "max",
"field": "session_time_m",
"as": "max_session_time_m"
}
],
"groupby": [
"homework_done"
]
},
{
"calculate": "datum.upper_box_session_time_m - datum.lower_box_session_time_m",
"as": "iqr_session_time_m"
},
{
"calculate": "min(datum.upper_box_session_time_m + datum.iqr_session_time_m * 1.5, datum.max_session_time_m)",
"as": "upper_whisker_session_time_m"
},
{
"calculate": "max(datum.lower_box_session_time_m - datum.iqr_session_time_m * 1.5, datum.min_session_time_m)",
"as": "lower_whisker_session_time_m"
}
],
"layer": [
{
"mark": {
"type": "rule",
"style": "boxplot-rule"
},
"encoding": {
"y": {
"field": "lower_whisker_session_time_m",
"type": "quantitative",
"title": "session_time_m"
},
"y2": {
"field": "lower_box_session_time_m",
"type": "quantitative"
},
"x": {
"field": "homework_done",
"type": "nominal",
"title": "homework_done"
}
}
},
{
"mark": {
"type": "rule",
"style": "boxplot-rule"
},
"encoding": {
"y": {
"field": "upper_box_session_time_m",
"type": "quantitative",
"title": "session_time_m"
},
"y2": {
"field": "upper_whisker_session_time_m",
"type": "quantitative"
},
"x": {
"field": "homework_done",
"type": "nominal",
"title": "homework_done"
}
}
},
{
"mark": {
"type": "bar",
"size": 14,
"style": "boxplot-box"
},
"encoding": {
"y": {
"field": "lower_box_session_time_m",
"type": "quantitative",
"title": "session_time_m"
},
"y2": {
"field": "upper_box_session_time_m",
"type": "quantitative"
},
"x": {
"field": "homework_done",
"type": "nominal",
"title": "homework_done"
}
}
},
{
"mark": {
"color": "white",
"type": "tick",
"size": 14,
"orient": "horizontal",
"style": "boxplot-median"
},
"encoding": {
"y": {
"field": "mid_box_session_time_m",
"type": "quantitative",
"title": "session_time_m"
},
"x": {
"field": "homework_done",
"type": "nominal",
"title": "homework_done"
}
}
}
]
},
{
"transform": [
{
"window": [
{
"op": "q1",
"field": "session_time_m",
"as": "lower_box_session_time_m"
},
{
"op": "q3",
"field": "session_time_m",
"as": "upper_box_session_time_m"
}
],
"frame": [
null,
null
],
"groupby": [
"homework_done"
]
},
{
"filter": "(datum.session_time_m < datum.lower_box_session_time_m - 1.5 * (datum.upper_box_session_time_m - datum.lower_box_session_time_m)) || (datum.session_time_m > datum.upper_box_session_time_m + 1.5 * (datum.upper_box_session_time_m - datum.lower_box_session_time_m))"
}
],
"mark": {
"type": "point",
"style": "boxplot-outliers"
},
"encoding": {
"y": {
"field": "session_time_m",
"type": "quantitative"
},
"x": {
"field": "homework_done",
"type": "nominal",
"title": "homework_done"
}
}
}
]
}
} |
Here is a small example {
"data": {
"values": [
{
"homework_done": false,
"session_time_m": 2,
"session_hour": 1
},
{
"homework_done": false,
"session_time_m": 0,
"session_hour": 2
}
]
},
"$schema": "https://vega.github.io/schema/vega-lite/v3.0.0.json",
"facet": {
"column": {
"type": "nominal",
"field": "session_hour"
}
},
"spec": {
"layer": [
{
"transform": [
{
"aggregate": [
{
"op": "median",
"field": "session_time_m",
"as": "mid_box_session_time_m"
}
],
"groupby": [
"homework_done"
]
}
],
"layer": [
{
"mark": {
"type": "tick"
},
"encoding": {
"y": {
"field": "mid_box_session_time_m",
"type": "quantitative"
},
"x": {
"field": "homework_done",
"type": "nominal"
}
}
}
]
},
{
"transform": [
{
"window": [
],
"groupby": [
"homework_done"
]
}
],
"mark": {
"type": "point"
},
"encoding": {
"y": {
"field": "session_time_m",
"type": "quantitative"
},
"x": {
"field": "homework_done",
"type": "nominal"
}
}
}
]
}
} |
Hmm, weird. We have a |
Ahh, the problem are the scales. We have a scale at the top level spec but it reads data from |
I'll keep looking later. |
Here is another example that doesn't work {
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"description": "A vertical 1D box plot showing median, min, and max in the US population distribution of age groups in 2000.",
"data": {"url": "data/population.json"},
"mark": "boxplot",
"encoding": {
"y": {
"field": "people",
"type": "quantitative",
"axis": {"title": "population"}
},
"column": {
"field": "sex",
"type": "ordinal"
}
}
} |
Hmm, why is people in the domain here? "scales": [
{
"name": "y",
"type": "linear",
"domain": {
"fields": [
{"data": "data_1", "field": "lower_whisker_people"},
{"data": "data_1", "field": "lower_box_people"},
{"data": "data_1", "field": "upper_box_people"},
{"data": "data_1", "field": "upper_whisker_people"},
{"data": "data_1", "field": "mid_box_people"},
{"data": "data_3", "field": "people"}
]
},
"range": [{"signal": "child_height"}, 0],
"nice": true,
"zero": true
}
], |
Ahh, |
Here is a small spec that shows the error even when I fix the push down logic. {
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"data": {
"url": "data/population.json"
},
"facet": {
"column": {
"field": "sex",
"type": "ordinal"
}
},
"spec": {
"layer": [
{
"transform": [
{
"aggregate": [
{
"op": "min",
"field": "people",
"as": "min_people"
}
],
"groupby": []
}
],
"mark": {
"type": "tick",
"style": "boxplot-rule"
},
"encoding": {
"y": {
"field": "min_people",
"type": "quantitative"
}
}
},
{
"transform": [
{
"window": [
{
"op": "q1",
"field": "people",
"as": "lower_box_people"
}
],
"groupby": []
}
],
"mark": {
"type": "point",
"style": "boxplot-outliers"
},
"encoding": {
"y": {
"field": "people",
"type": "quantitative"
}
}
}
]
}
} |
@invokesus had a hunch that the bug may be caused by #4029. However, going back to dad6955, doesn't seem to fix the issue with #4156 (comment) but it does fix #4156 (comment). So maybe #4175 resolves at least partially resolves the issue. |
This example works before the transform merging but not after: {
"data": {
"values": [
{
"homework_done": false,
"session_time_m": 2,
"session_hour": 1
},
{
"homework_done": false,
"session_time_m": 0,
"session_hour": 2
}
]
},
"$schema": "https://vega.github.io/schema/vega-lite/v3.0.0.json",
"facet": {
"column": {
"type": "nominal",
"field": "session_hour"
}
},
"spec": {
"layer": [
{
"transform": [
{
"aggregate": [
{
"op": "median",
"field": "session_time_m",
"as": "mid_box_session_time_m"
}
],
"groupby": []
}
],
"mark": {
"type": "tick"
},
"encoding": {
"y": {
"field": "mid_box_session_time_m",
"type": "quantitative"
}
}
},
{
"transform": [
{
"window": [],
"groupby": []
}
],
"mark": {
"type": "point"
},
"encoding": {
"y": {
"field": "session_time_m",
"type": "quantitative"
}
}
}
]
}
} For some reason, this spec doesn't work in either case {
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"data": {
"url": "data/population.json"
},
"facet": {
"column": {
"field": "sex",
"type": "ordinal"
}
},
"spec": {
"layer": [
{
"transform": [
{
"aggregate": [
{
"op": "min",
"field": "people",
"as": "min_people"
}
],
"groupby": []
}
],
"mark": {
"type": "tick",
"style": "boxplot-rule"
},
"encoding": {
"y": {
"field": "min_people",
"type": "quantitative"
}
}
},
{
"transform": [
{
"window": [],
"groupby": []
}
],
"mark": {
"type": "point"
},
"encoding": {
"y": {
"field": "people",
"type": "quantitative"
}
}
}
]
}
} |
Wow, so with dad6955 the dataflow looks like and with the latest dom/window-dataflow So something is very wrong here. I'm going to wait for @invokesus to fix #4175 and see whether this resolves this problem. #4177 still seems like a good idea so I'll leave it open. |
#4177 and #4175 will fix this. Phew, this was one of the hardest debugging sessions I've done. Took me three days with some really weird behavior in between. However, it exposed a few separate bugs that are all fixed now and we have tests and helper tools to make sure we can catch these class of bugs much easier now. |
boxplot
doesn't works withcolumn
encoding andfacet
.Result
Error: Undefined data set name: "data_1"
see editor
The text was updated successfully, but these errors were encountered: