Skip to content

Commit

Permalink
Test additional long-form data parsing functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
mwaskom committed Jun 19, 2021
1 parent fcd5f39 commit 86a5c87
Showing 1 changed file with 69 additions and 18 deletions.
87 changes: 69 additions & 18 deletions seaborn/tests/_core/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def long_variables(self):
variables = dict(x="x", y="y", hue="a", size="z", style="s_cat")
return variables

def test_long_df(self, long_df, long_variables):
def test_named_vectors(self, long_df, long_variables):

p = PlotData(long_df, long_variables)
assert p._source_data is long_df
Expand All @@ -28,7 +28,7 @@ def test_long_df(self, long_df, long_variables):
assert p.names[key] == val
assert_series_equal(p.frame[key], long_df[val])

def test_long_df_and_vectors(self, long_df, long_variables):
def test_named_and_given_vectors(self, long_df, long_variables):

long_variables["y"] = long_df["b"]
long_variables["size"] = long_df["z"].to_numpy()
Expand All @@ -43,7 +43,7 @@ def test_long_df_and_vectors(self, long_df, long_variables):
assert p.names["y"] == "b"
assert p.names["size"] is None

def test_long_df_with_index(self, long_df, long_variables):
def test_index_as_variable(self, long_df, long_variables):

index = pd.Int64Index(np.arange(len(long_df)) * 2 + 10, name="i")
long_variables["x"] = "i"
Expand All @@ -52,7 +52,7 @@ def test_long_df_with_index(self, long_df, long_variables):
assert p.names["x"] == "i"
assert_series_equal(p.frame["x"], pd.Series(index, index))

def test_long_df_with_multiindex(self, long_df, long_variables):
def test_multiindex_as_variables(self, long_df, long_variables):

index_i = pd.Int64Index(np.arange(len(long_df)) * 2 + 10, name="i")
index_j = pd.Int64Index(np.arange(len(long_df)) * 3 + 5, name="j")
Expand All @@ -63,7 +63,35 @@ def test_long_df_with_multiindex(self, long_df, long_variables):
assert_series_equal(p.frame["x"], pd.Series(index_i, index))
assert_series_equal(p.frame["y"], pd.Series(index_j, index))

def test_long_dict(self, long_dict, long_variables):
def test_int_as_variable_key(self):

df = pd.DataFrame(np.random.uniform(size=(10, 3)))

var = "x"
key = 2

p = PlotData(df, {var: key})
assert_series_equal(p.frame[var], df[key])
assert p.names[var] == str(key)

def test_int_as_variable_value(self, long_df):

p = PlotData(long_df, {"x": 0, "y": "y"})
assert (p.frame["x"] == 0).all()
assert p.names["x"] is None

def test_tuple_as_variable_key(self):

cols = pd.MultiIndex.from_product([("a", "b", "c"), ("x", "y")])
df = pd.DataFrame(np.random.uniform(size=(10, 6)), columns=cols)

var = "hue"
key = ("b", "y")
p = PlotData(df, {var: key})
assert_series_equal(p.frame[var], df[key])
assert p.names[var] == str(key)

def test_dict_as_data(self, long_dict, long_variables):

p = PlotData(long_dict, long_variables)
assert p._source_data is long_dict
Expand All @@ -74,7 +102,7 @@ def test_long_dict(self, long_dict, long_variables):
"vector_type",
["series", "numpy", "list"],
)
def test_long_vectors(self, long_df, long_variables, vector_type):
def test_vectors_various_types(self, long_df, long_variables, vector_type):

variables = {key: long_df[val] for key, val in long_variables.items()}
if vector_type == "numpy":
Expand Down Expand Up @@ -103,17 +131,6 @@ def test_none_as_variable_value(self, long_df):
assert list(p.frame.columns) == ["x"]
assert p.names == {"x": "z"}

def test_long_undefined_variables(self, long_df):

with pytest.raises(ValueError):
PlotData(long_df, dict(x="not_in_df"))

with pytest.raises(ValueError):
PlotData(long_df, dict(x="x", y="not_in_df"))

with pytest.raises(ValueError):
PlotData(long_df, dict(x="x", y="y", hue="not_in_df"))

def test_frame_and_vector_mismatched_lengths(self, long_df):

vector = np.arange(len(long_df) * 2)
Expand Down Expand Up @@ -169,7 +186,41 @@ def test_index_alignment_between_series(self):
assert_series_equal(p.frame["x"], x_col_expected)
assert_series_equal(p.frame["y"], y_col_expected)

def test_contains(self, long_df):
def test_key_not_in_data_raises(self, long_df):

var = "x"
key = "what"
msg = f"Could not interpret value `{key}` for parameter `{var}`"
with pytest.raises(ValueError, match=msg):
PlotData(long_df, {var: key})

def test_key_with_no_data_raises(self):

var = "x"
key = "what"
msg = f"Could not interpret value `{key}` for parameter `{var}`"
with pytest.raises(ValueError, match=msg):
PlotData(variables={var: key})

def test_data_vector_different_lengths_raises(self, long_df):

vector = np.arange(len(long_df) - 5)
msg = "Length of ndarray vectors must match length of `data`"
with pytest.raises(ValueError, match=msg):
PlotData(long_df, {"y": vector})

def test_undefined_variables_raise(self, long_df):

with pytest.raises(ValueError):
PlotData(long_df, dict(x="not_in_df"))

with pytest.raises(ValueError):
PlotData(long_df, dict(x="x", y="not_in_df"))

with pytest.raises(ValueError):
PlotData(long_df, dict(x="x", y="y", hue="not_in_df"))

def test_contains_operation(self, long_df):

p = PlotData(long_df, {"x": "y", "hue": long_df["a"]})
assert "x" in p
Expand Down

0 comments on commit 86a5c87

Please sign in to comment.