From a09cfc331f1d65edbe26ab340cb41f8848d03ecd Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 17 Jul 2012 16:29:54 +0300 Subject: [PATCH 01/88] Added rplot stub --- pandas/tools/rplot.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pandas/tools/rplot.py diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py new file mode 100644 index 0000000000000..783449f06ded1 --- /dev/null +++ b/pandas/tools/rplot.py @@ -0,0 +1,3 @@ +class RPlot: + def __init__(self): + pass \ No newline at end of file From 169b47f866174701dd853eb7b5fda006333843cf Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 17 Jul 2012 18:14:15 +0300 Subject: [PATCH 02/88] Added an RPlot class and a simple geom class --- pandas/tools/rplot.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 783449f06ded1..dee19d390a29e 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -1,3 +1,21 @@ +import matplotlib.pyplot as plt + class RPlot: - def __init__(self): - pass \ No newline at end of file + def __init__(self, data): + self.data = data + self.ax = plt.gca() + + def __add__(self, other): + other.plot(self.ax, self.data) + +class GeomPoint: + def __init__(self, x, y, marker='o', colour='grey', size=20, alpha=1.0): + self.x = x + self.y = y + self.marker = marker + self.colour = colour + self.size = size + self.alpha = alpha + + def plot(self, ax, data): + ax.scatter(data[self.x], data[self.y], c=self.colour, marker=self.marker, s=self.size, alpha=self.alpha) \ No newline at end of file From a207636b5826d74a66086a03387cf5746107de38 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 18 Jul 2012 00:49:45 +0300 Subject: [PATCH 03/88] Added GeomDensity2d --- pandas/tools/rplot.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index dee19d390a29e..50f1d1422c9ed 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -1,9 +1,12 @@ +import numpy as np +import scipy.stats as stats import matplotlib.pyplot as plt class RPlot: def __init__(self, data): self.data = data self.ax = plt.gca() + self.aes = {} def __add__(self, other): other.plot(self.ax, self.data) @@ -18,4 +21,30 @@ def __init__(self, x, y, marker='o', colour='grey', size=20, alpha=1.0): self.alpha = alpha def plot(self, ax, data): - ax.scatter(data[self.x], data[self.y], c=self.colour, marker=self.marker, s=self.size, alpha=self.alpha) \ No newline at end of file + ax.scatter(data[self.x], data[self.y], c=self.colour, marker=self.marker, s=self.size, alpha=self.alpha) + + +class GeomDensity2d: + def __init__(self, x, y, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): + self.x = x + self.y = y + self.weight = weight + self.colour = colour + self.size = size + self.linetype = linetype + self.alpha = alpha + + def plot(self, ax, data): + x = data[self.x] + y = data[self.y] + rvs = np.array([x, y]) + x_min = x.min() + x_max = x.max() + y_min = y.min() + y_max = y.max() + X, Y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j] + positions = np.vstack([X.ravel(), Y.ravel()]) + values = np.vstack([x, y]) + kernel = stats.gaussian_kde(values) + Z = np.reshape(kernel(positions).T, X.shape) + ax.contour(Z, alpha=alpha, extent=[x_min, x_max, y_min, y_max]) \ No newline at end of file From 7a168fef67faaf2d54ec6c7f0d4a5bb7e356c361 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 19 Jul 2012 22:56:11 +0300 Subject: [PATCH 04/88] Small improvements --- pandas/tools/rplot.py | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 50f1d1422c9ed..93c40dadbc430 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -3,16 +3,21 @@ import matplotlib.pyplot as plt class RPlot: - def __init__(self, data): + def __init__(self, data, x=None, y=None): self.data = data self.ax = plt.gca() self.aes = {} + if x is not None and y is None: + self.aes['x'] = x + elif x is not None and y is not None: + self.aes['x'] = x + self.aes['y'] = y def __add__(self, other): - other.plot(self.ax, self.data) + other.plot(self) class GeomPoint: - def __init__(self, x, y, marker='o', colour='grey', size=20, alpha=1.0): + def __init__(self, x=None, y=None, marker='o', colour='grey', size=20, alpha=1.0): self.x = x self.y = y self.marker = marker @@ -20,12 +25,16 @@ def __init__(self, x, y, marker='o', colour='grey', size=20, alpha=1.0): self.size = size self.alpha = alpha - def plot(self, ax, data): - ax.scatter(data[self.x], data[self.y], c=self.colour, marker=self.marker, s=self.size, alpha=self.alpha) - + def plot(self, rplot): + aes = rplot.aes + if self.x is not None: + aes['x'] = self.x + if self.y is not None: + aes['y'] = self.y + rplot.ax.scatter(rplot.data[aes['x']], rplot.data[aes['y']], c=self.colour, marker=self.marker, s=self.size, alpha=self.alpha) class GeomDensity2d: - def __init__(self, x, y, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): + def __init__(self, x=None, y=None, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): self.x = x self.y = y self.weight = weight @@ -34,9 +43,14 @@ def __init__(self, x, y, weight=1.0, colour='grey', size=0.5, linetype=1.0, alph self.linetype = linetype self.alpha = alpha - def plot(self, ax, data): - x = data[self.x] - y = data[self.y] + def plot(self, rplot): + aes = rplot.aes + if self.x is not None: + aes['x'] = self.x + if self.y is not None: + aes['y'] = self.y + x = rplot.data[aes['x']] + y = rplot.data[aes['y']] rvs = np.array([x, y]) x_min = x.min() x_max = x.max() @@ -47,4 +61,4 @@ def plot(self, ax, data): values = np.vstack([x, y]) kernel = stats.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) - ax.contour(Z, alpha=alpha, extent=[x_min, x_max, y_min, y_max]) \ No newline at end of file + rplot.ax.contour(Z, alpha=self.alpha, extent=[x_min, x_max, y_min, y_max]) From d32e8356a31ffdfa9ba67ea7fb04da942a39bfc2 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 20 Jul 2012 01:01:04 +0300 Subject: [PATCH 05/88] Set colours from a column --- pandas/tools/rplot.py | 51 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 93c40dadbc430..d47b22f0d48b6 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -1,12 +1,41 @@ import numpy as np import scipy.stats as stats import matplotlib.pyplot as plt +import random +import pdb + +def random_colour(name): + """Random colour from a string or other hashable value. + + Parameters: + ----------- + name: A string value to use as a seed to the random number generator. + + Returns: + -------- + (r, g, b): Where r, g, b are random numbers in the range (0, 1). + """ + random.seed(name) + return [random.random() for _ in range(3)] + +def filter_column(frame, column, filter_column, filter_value): + n = len(frame) + vcol = frame[column] + fcol = frame[filter_column] + result = [] + for v, f in zip(vcol, fcol): + if f == filter_value: + result.append(v) + return np.array(result) class RPlot: def __init__(self, data, x=None, y=None): self.data = data self.ax = plt.gca() - self.aes = {} + self.aes = { + 'x' : None, + 'y' : None, + } if x is not None and y is None: self.aes['x'] = x elif x is not None and y is not None: @@ -14,13 +43,13 @@ def __init__(self, data, x=None, y=None): self.aes['y'] = y def __add__(self, other): - other.plot(self) + return other.plot(self) class GeomPoint: - def __init__(self, x=None, y=None, marker='o', colour='grey', size=20, alpha=1.0): + def __init__(self, x=None, y=None, shape='o', colour='grey', size=20, alpha=1.0): self.x = x self.y = y - self.marker = marker + self.shape = shape self.colour = colour self.size = size self.alpha = alpha @@ -31,7 +60,18 @@ def plot(self, rplot): aes['x'] = self.x if self.y is not None: aes['y'] = self.y - rplot.ax.scatter(rplot.data[aes['x']], rplot.data[aes['y']], c=self.colour, marker=self.marker, s=self.size, alpha=self.alpha) + if type(self.colour) is not type(""): + colours = list(set(self.colour)) + for colour in colours: + xcol = filter_column(rplot.data, aes['x'], self.colour.name, colour) + ycol = filter_column(rplot.data, aes['y'], self.colour.name, colour) + rplot.ax.scatter(xcol, ycol, c=random_colour(colour), marker=self.shape, s=self.size, alpha=self.alpha, label=colour) + else: + rplot.ax.scatter(rplot.data[aes['x']], rplot.data[aes['y']], c=self.colour, marker=self.shape, s=self.size, alpha=self.alpha) + rplot.ax.set_xlabel(aes['x']) + rplot.ax.set_ylabel(aes['y']) + rplot.ax.legend() + return rplot class GeomDensity2d: def __init__(self, x=None, y=None, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): @@ -62,3 +102,4 @@ def plot(self, rplot): kernel = stats.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) rplot.ax.contour(Z, alpha=self.alpha, extent=[x_min, x_max, y_min, y_max]) + return rplot From 26b6e8b3c0282a6815def17f8af7133b75c178f0 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sun, 22 Jul 2012 20:05:52 +0300 Subject: [PATCH 06/88] Added a docstring to filter_column --- pandas/tools/rplot.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index d47b22f0d48b6..945296a88468c 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -19,6 +19,19 @@ def random_colour(name): return [random.random() for _ in range(3)] def filter_column(frame, column, filter_column, filter_value): + """Select only those values from column that have a specified value in another column. + + Parameters: + ----------- + frame: pandas data frame object. + column: column name from which to select values. + filter_column: column used to filter. + filter_value: only those rows with this value in filter_column will be selected. + + Returns: + -------- + numpy array with filtered values. + """ n = len(frame) vcol = frame[column] fcol = frame[filter_column] @@ -30,7 +43,7 @@ def filter_column(frame, column, filter_column, filter_value): class RPlot: def __init__(self, data, x=None, y=None): - self.data = data + seomlf.data = data self.ax = plt.gca() self.aes = { 'x' : None, From 4c6821ca033e21e1304db0b3bc5bc46eb998036e Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sun, 22 Jul 2012 20:14:36 +0300 Subject: [PATCH 07/88] Added parse_facets function --- pandas/tools/rplot.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 945296a88468c..fc2f91a8ac04f 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -41,6 +41,20 @@ def filter_column(frame, column, filter_column, filter_value): result.append(v) return np.array(result) +def parse_facets(facet): + """Parse facets formula of the form 'lhs ~ rhs'. + + Parameters: + ----------- + facets: facets formula. + + Returns: + -------- + A list with LHS and RHS column names. + """ + lhs, rhs = [col.strip() for col in facet.split('~')] + return (lhs, rhs) + class RPlot: def __init__(self, data, x=None, y=None): seomlf.data = data @@ -116,3 +130,10 @@ def plot(self, rplot): Z = np.reshape(kernel(positions).T, X.shape) rplot.ax.contour(Z, alpha=self.alpha, extent=[x_min, x_max, y_min, y_max]) return rplot + +class FacetGrid: + def __init__(self, facets): + self.facets = facets + + def plot(self, rplot): + pass From e063e9afd7358df94cb4e2e54864ebe95b9e0f74 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sun, 22 Jul 2012 22:58:19 +0300 Subject: [PATCH 08/88] Work on layer class --- pandas/tools/rplot.py | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index fc2f91a8ac04f..96ac257a21f83 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -55,6 +55,36 @@ def parse_facets(facet): lhs, rhs = [col.strip() for col in facet.split('~')] return (lhs, rhs) +def scale_size(column, categorical, min_size=1.0, max_size=50.0): + def scaler(data, index): + if categorical: + pass + else: + x = data[column].iget(index) + a = min(data[column]) + b = max(data[column]) + return min_size + ((x - a) / (b - a)) * (max_size - min_size) + return scaler + +class Layer: + """ + Layer object representing a single plot layer. + """ + def __init__(self, geom, geom_params, data, aesthetics): + self.geom = geom + self.geom_params = geom_params + self.data = data + self.aesthetics = aesthetics + + def render(self, ax): + if self.geom == 'point': + for index in range(len(self.data)): + row = self.data.irow(index) + x = row[self.aesthetics['x']] + y = row[self.aesthetics['y']] + size_scaler = self.aesthetics['size'](self.data, index) + ax.scatter(x, y, size_scaler(self.data, index)) + class RPlot: def __init__(self, data, x=None, y=None): seomlf.data = data @@ -129,11 +159,4 @@ def plot(self, rplot): kernel = stats.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) rplot.ax.contour(Z, alpha=self.alpha, extent=[x_min, x_max, y_min, y_max]) - return rplot - -class FacetGrid: - def __init__(self, facets): - self.facets = facets - - def plot(self, rplot): - pass + return rplot \ No newline at end of file From e291f431a82fc836af77515db24b7e9bb12ff621 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 23 Jul 2012 15:36:40 +0300 Subject: [PATCH 09/88] Added docstring to scale_size function --- pandas/tools/rplot.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 96ac257a21f83..01d37f2e4dab8 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -56,6 +56,19 @@ def parse_facets(facet): return (lhs, rhs) def scale_size(column, categorical, min_size=1.0, max_size=50.0): + """Creates a function that converts between a data attribute to point size. + + Parameters: + ----------- + column: string, a column name + categorical: boolean, wether the column contains categorical data + min_size: float, minimum point size + max_size: float, maximum point size + + Returns: + -------- + a function of two arguments that takes a data set and a row number, returns float + """ def scaler(data, index): if categorical: pass From 713ae3eb76943df91f6f5e80bccd7bb5b5e06125 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 23 Jul 2012 16:05:45 +0300 Subject: [PATCH 10/88] Added scale_gradient function --- pandas/tools/rplot.py | 44 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 01d37f2e4dab8..40e7803936c07 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -55,13 +55,13 @@ def parse_facets(facet): lhs, rhs = [col.strip() for col in facet.split('~')] return (lhs, rhs) -def scale_size(column, categorical, min_size=1.0, max_size=50.0): +def scale_size(column, categorical, min_size=1.0, max_size=80.0): """Creates a function that converts between a data attribute to point size. Parameters: ----------- column: string, a column name - categorical: boolean, wether the column contains categorical data + categorical: boolean, true if the column contains categorical data min_size: float, minimum point size max_size: float, maximum point size @@ -79,6 +79,37 @@ def scaler(data, index): return min_size + ((x - a) / (b - a)) * (max_size - min_size) return scaler +def scale_gradient(column, categorical, colour1=(0.0, 0.0, 0.0), colour2=(1.0, 0.7, 0.8)): + """Create a function that converts between a data attribute value to a + point in colour space between two specified colours. + + Parameters: + ----------- + column: string, a column name + categorical: boolean, true if the column contains categorical data + colour1: a tuple with three float values specifying rgb components + colour2: a tuple with three float values specifying rgb components + + Returns: + -------- + a function of two arguments that takes a data set and a row number, returns a + tuple with three float values with rgb component values. + """ + def scaler(data, index): + if categorical: + pass + else: + x = data[column].iget(index) + a = min(data[column]) + b = max(data[column]) + r1, g1, b1 = colour1 + r2, g2, b2 = colour2 + x_scaled = (x - a) / (b - a) + return (r1 + (r2 - r1) * x_scaled, + g1 + (g2 - g1) * x_scaled, + b1 + (b2 - b1) * x_scaled) + return scaler + class Layer: """ Layer object representing a single plot layer. @@ -95,8 +126,13 @@ def render(self, ax): row = self.data.irow(index) x = row[self.aesthetics['x']] y = row[self.aesthetics['y']] - size_scaler = self.aesthetics['size'](self.data, index) - ax.scatter(x, y, size_scaler(self.data, index)) + size_scaler = self.aesthetics['size'] + colour_scaler = self.aesthetics['colour'] + ax.scatter(x, y, + s=size_scaler(self.data, index), + c=colour_scaler(self.data, index)) + ax.set_xlabel(self.aesthetics['x']) + ax.set_ylabel(self.aesthetics['y']) class RPlot: def __init__(self, data, x=None, y=None): From c3a523125499c7d2a4c877c5300d04fd0c847034 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 23 Jul 2012 16:08:50 +0300 Subject: [PATCH 11/88] Added scale_gradient function --- pandas/tools/rplot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 40e7803936c07..f501b0b6756e0 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -128,9 +128,11 @@ def render(self, ax): y = row[self.aesthetics['y']] size_scaler = self.aesthetics['size'] colour_scaler = self.aesthetics['colour'] + alpha = self.aesthetics['alpha'] ax.scatter(x, y, s=size_scaler(self.data, index), - c=colour_scaler(self.data, index)) + c=colour_scaler(self.data, index), + alpha=alpha) ax.set_xlabel(self.aesthetics['x']) ax.set_ylabel(self.aesthetics['y']) From a9866ac330c8649af9e384cd4182327dc19ecb18 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 24 Jul 2012 13:40:13 +0300 Subject: [PATCH 12/88] Added a test function to display grouped plots --- pandas/tools/rplot.py | 46 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index f501b0b6756e0..84eab3317cdc7 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -136,22 +136,42 @@ def render(self, ax): ax.set_xlabel(self.aesthetics['x']) ax.set_ylabel(self.aesthetics['y']) +def display_grouped(grouped_data, x, y, fig): + """A test routine to display grouped data. + + Parameters: + ----------- + grouped_data: data frame grouped by df.groupby pandas routine + fig: matplotlib figure + + Returns: + -------- + Nothing + """ + shingle1 = set([]) + shingle2 = set([]) + # Fill shingles. + for name, group in grouped_data: + if type(name) is type(()): + shingle1.add(name[0]) + shingle2.add(name[1]) + else: + shingle1.add(name) + rows = len(shingle1) + cols = len(shingle2) + print rows, cols + subplot_nr = 1 + for name, group, in grouped_data: + ax = fig.add_subplot(rows, cols, subplot_nr) + ax.scatter(group[x], group[y]) + subplot_nr += 1 + class RPlot: - def __init__(self, data, x=None, y=None): - seomlf.data = data - self.ax = plt.gca() - self.aes = { - 'x' : None, - 'y' : None, - } - if x is not None and y is None: - self.aes['x'] = x - elif x is not None and y is not None: - self.aes['x'] = x - self.aes['y'] = y + def __init__(self): + pass def __add__(self, other): - return other.plot(self) + pass class GeomPoint: def __init__(self, x=None, y=None, shape='o', colour='grey', size=20, alpha=1.0): From c1bb97886b8ce5ef204c0e71c0ca9017bfdf67ba Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 24 Jul 2012 17:02:11 +0300 Subject: [PATCH 13/88] Added TrellisGrid class --- pandas/tools/rplot.py | 121 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 2 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 84eab3317cdc7..7af77069ca26c 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -3,6 +3,7 @@ import matplotlib.pyplot as plt import random import pdb +from copy import deepcopy def random_colour(name): """Random colour from a string or other hashable value. @@ -121,6 +122,16 @@ def __init__(self, geom, geom_params, data, aesthetics): self.aesthetics = aesthetics def render(self, ax): + """Render the layer on a matplotlib axis. + + Parameters: + ----------- + ax: matplotlib axis object to draw on + + Returns: + -------- + ax: matplotlib axis object + """ if self.geom == 'point': for index in range(len(self.data)): row = self.data.irow(index) @@ -129,12 +140,14 @@ def render(self, ax): size_scaler = self.aesthetics['size'] colour_scaler = self.aesthetics['colour'] alpha = self.aesthetics['alpha'] + print colour_scaler(self.data, index) ax.scatter(x, y, s=size_scaler(self.data, index), c=colour_scaler(self.data, index), alpha=alpha) ax.set_xlabel(self.aesthetics['x']) ax.set_ylabel(self.aesthetics['y']) + return ax def display_grouped(grouped_data, x, y, fig): """A test routine to display grouped data. @@ -166,12 +179,116 @@ def display_grouped(grouped_data, x, y, fig): ax.scatter(group[x], group[y]) subplot_nr += 1 +class TrellisGrid: + def __init__(self, layer, by): + """Initialize TreelisGrid instance. + + Parameters: + ----------- + layer: a Layer object instance + by: column names to group by + """ + self.data = layer.data + self.grouped = self.data.groupby(by) + self.groups = self.grouped.groups.keys() + self.shingle1 = set([g[0] for g in self.groups]) + self.shingle2 = set([g[1] for g in self.groups]) + self.rows = len(self.shingle1) + self.cols = len(self.shingle2) + self.grid = [[None for _ in range(self.cols)] for _ in range(self.rows)] + self.group_grid = [[None for _ in range(self.cols)] for _ in range(self.rows)] + row = 0 + col = 0 + print self.rows, self.cols, len(self.groups), len(self.grouped) + for group, data in self.grouped: + print row, col + new_layer = deepcopy(layer) + new_layer.data = data + self.grid[row][col] = new_layer + col += 1 + if col >= self.cols: + col = 0 + row += 1 + + def get_layer(self, row, col): + """Get a layer associated with the specified row and col. + + Parameters: + ----------- + row: integer row index + col: integer column index + + Returns: + -------- + layer object + """ + return self.grid[row][col] + + def get_group(self, row, col): + """Get a group tuple for the specified row and col. + + Parameters: + ----------- + row: integer row index + col: integer column index + + Returns: + -------- + a tuple + """ + return self.group_grid[row][col] + + def render(self, fig): + """Render the trellis plot on a figure. + + Parameters: + ----------- + fig: matplotlib figure to draw on + + Returns: + -------- + matplotlib figure + """ + index = 1 + for row in self.grid: + for layer in row: + ax = fig.add_subplot(self.rows, self.cols, index) + layer.render(ax) + return fig + +def facetize(layer, by): + """Create a grouped plot by taking a layer and cloning it with changed data. + + Parameters: + ----------- + layer: an rplot layer object to be used as the basis for facetizing + by: column names to group by + + Returns: + -------- + a two dimensional array of layers arranged in a way that they would be displayed + """ + data = layer.data + grouped = data.groupby(by) + groups = grouped.groups.keys() + shingle1 = set([g[0] for g in groups]) + shingle2 = set([g[1] for g in groups]) + rows = len(shingle1) + cols = len(shingle2) + if cols == 0: + cols = 1 + grid = [[[] for _ in range(cols)] for _ in range(rows)] + col = 0 + row = 0 + for group, data in grouped: + pass + class RPlot: def __init__(self): - pass + self.layers = [] def __add__(self, other): - pass + self.layers.append(other) class GeomPoint: def __init__(self, x=None, y=None, shape='o', colour='grey', size=20, alpha=1.0): From 4c28d9498fb44041b428c29da8f5927826d15e9c Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 24 Jul 2012 17:10:31 +0300 Subject: [PATCH 14/88] Added scale_constant function --- pandas/tools/rplot.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 7af77069ca26c..83534e98984ff 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -111,6 +111,21 @@ def scaler(data, index): b1 + (b2 - b1) * x_scaled) return scaler +def scale_constant(constant): + """Create a function that always returns a specified constant value. + + Parameters: + ----------- + constant: a Python object to be returned + + Returns: + -------- + a two argument function + """ + def scaler(data, index): + return constant + return scaler + class Layer: """ Layer object representing a single plot layer. @@ -140,7 +155,6 @@ def render(self, ax): size_scaler = self.aesthetics['size'] colour_scaler = self.aesthetics['colour'] alpha = self.aesthetics['alpha'] - print colour_scaler(self.data, index) ax.scatter(x, y, s=size_scaler(self.data, index), c=colour_scaler(self.data, index), From 34cc9ada16434f1226c54e894c29391d2f33554f Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 24 Jul 2012 17:47:00 +0300 Subject: [PATCH 15/88] Added scale_gradient2 function --- pandas/tools/rplot.py | 44 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 83534e98984ff..8b894ce05f1b8 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -111,6 +111,46 @@ def scaler(data, index): b1 + (b2 - b1) * x_scaled) return scaler +def scale_gradient2(column, categorical, colour1=(0.0, 0.0, 0.0), colour2=(1.0, 0.7, 0.8), colour3=(0.2, 1.0, 0.5)): + """Create a function that converts between a data attribute value to a + point in colour space between three specified colours. + + Parameters: + ----------- + column: string, a column name + categorical: boolean, true if the column contains categorical data + colour1: a tuple with three float values specifying rgb components + colour2: a tuple with three float values specifying rgb components + colour3: a tuple with three float values specifying rgb components + + Returns: + -------- + a function of two arguments that takes a data set and a row number, returns a + tuple with three float values with rgb component values. + """ + def scaler(data, index): + if categorical: + pass + else: + x = data[column].iget(index) + a = min(data[column]) + b = max(data[column]) + r1, g1, b1 = colour1 + r2, g2, b2 = colour2 + r3, g3, b3 = colour3 + x_scaled = (x - a) / (b - a) + if x_scaled < 0.5: + x_scaled *= 2.0 + return (r1 + (r2 - r1) * x_scaled, + g1 + (g2 - g1) * x_scaled, + b1 + (b2 - b1) * x_scaled) + else: + x_scaled = (x_scaled - 0.5) * 2.0 + return (r2 + (r3 - r2) * x_scaled, + g2 + (g3 - g2) * x_scaled, + b2 + (b3 - b2) * x_scaled) + return scaler + def scale_constant(constant): """Create a function that always returns a specified constant value. @@ -186,7 +226,6 @@ def display_grouped(grouped_data, x, y, fig): shingle1.add(name) rows = len(shingle1) cols = len(shingle2) - print rows, cols subplot_nr = 1 for name, group, in grouped_data: ax = fig.add_subplot(rows, cols, subplot_nr) @@ -213,9 +252,7 @@ def __init__(self, layer, by): self.group_grid = [[None for _ in range(self.cols)] for _ in range(self.rows)] row = 0 col = 0 - print self.rows, self.cols, len(self.groups), len(self.grouped) for group, data in self.grouped: - print row, col new_layer = deepcopy(layer) new_layer.data = data self.grid[row][col] = new_layer @@ -268,6 +305,7 @@ def render(self, fig): for layer in row: ax = fig.add_subplot(self.rows, self.cols, index) layer.render(ax) + index += 1 return fig def facetize(layer, by): From d042e6019e9a5a09c00f471a3ee84d78c28a8eba Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 24 Jul 2012 18:46:25 +0300 Subject: [PATCH 16/88] Added scale_shape function --- pandas/tools/rplot.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 8b894ce05f1b8..69b9100e5f395 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -151,6 +151,24 @@ def scaler(data, index): b2 + (b3 - b2) * x_scaled) return scaler +def scale_shape(column): + """Create a function that converts between a categorical value and a scatter plot shape. + + Parameters: + ----------- + column: string, a column name to use + + Returns: + -------- + a function of two arguments + """ + shapes = ['o', 'D', 'h', 'H', '_', '8', 'p', '+', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] + def scaler(data, index): + values = list(set(data[column])) + x = data[column].iget(index) + return shapes[values.index(x)] + return scaler + def scale_constant(constant): """Create a function that always returns a specified constant value. @@ -194,10 +212,12 @@ def render(self, ax): y = row[self.aesthetics['y']] size_scaler = self.aesthetics['size'] colour_scaler = self.aesthetics['colour'] + shape_scaler = self.aesthetics['shape'] alpha = self.aesthetics['alpha'] ax.scatter(x, y, s=size_scaler(self.data, index), c=colour_scaler(self.data, index), + marker=shape_scaler(self.data, index), alpha=alpha) ax.set_xlabel(self.aesthetics['x']) ax.set_ylabel(self.aesthetics['y']) From 681b97ee20faf4a2ebd9f6430abb382a38c6272e Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 25 Jul 2012 18:06:09 +0300 Subject: [PATCH 17/88] Adjust spacing and axis limits in trellis plots --- pandas/tools/rplot.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 69b9100e5f395..b4ed0c907854b 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -321,11 +321,20 @@ def render(self, fig): matplotlib figure """ index = 1 + axes = [] for row in self.grid: for layer in row: ax = fig.add_subplot(self.rows, self.cols, index) layer.render(ax) + axes.append(ax) index += 1 + min_x = min([ax.get_xlim()[0] for ax in axes]) + max_x = max([ax.get_xlim()[1] for ax in axes]) + min_y = min([ax.get_ylim()[0] for ax in axes]) + max_y = max([ax.get_ylim()[1] for ax in axes]) + [ax.set_xlim(min_x, max_x) for ax in axes] + [ax.set_ylim(min_y, max_y) for ax in axes] + fig.subplots_adjust(wspace=0.0, hspace=0.0) return fig def facetize(layer, by): From 5736dea4dcef5a308eb2a75d7c8e33bea28c5ae9 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 25 Jul 2012 19:05:47 +0300 Subject: [PATCH 18/88] Display shingle variable values in a matplotlib table --- pandas/tools/rplot.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index b4ed0c907854b..46d86cf156aef 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -5,6 +5,13 @@ import pdb from copy import deepcopy +# +# TODO: +# * Make sure trellis display works when two or one grouping variable is specified +# * Enable labelling for legend display +# * Expand RPlot class +# + def random_colour(name): """Random colour from a string or other hashable value. @@ -264,6 +271,7 @@ def __init__(self, layer, by): self.data = layer.data self.grouped = self.data.groupby(by) self.groups = self.grouped.groups.keys() + self.by = by self.shingle1 = set([g[0] for g in self.groups]) self.shingle2 = set([g[1] for g in self.groups]) self.rows = len(self.shingle1) @@ -276,6 +284,7 @@ def __init__(self, layer, by): new_layer = deepcopy(layer) new_layer.data = data self.grid[row][col] = new_layer + self.group_grid[row][col] = group col += 1 if col >= self.cols: col = 0 @@ -334,7 +343,26 @@ def render(self, fig): max_y = max([ax.get_ylim()[1] for ax in axes]) [ax.set_xlim(min_x, max_x) for ax in axes] [ax.set_ylim(min_y, max_y) for ax in axes] - fig.subplots_adjust(wspace=0.0, hspace=0.0) + for index, axis in enumerate(axes): + if index % self.cols == 0: + pass + else: + axis.get_yaxis().set_ticks([]) + axis.set_ylabel('') + if index / self.cols == self.rows - 1: + pass + else: + axis.get_xaxis().set_ticks([]) + axis.set_xlabel('') + label1 = "%s = %s" % (self.by[0], self.group_grid[index / self.cols][index % self.cols][0]) + label2 = "%s = %s" % (self.by[1], self.group_grid[index / self.cols][index % self.cols][1]) + if self.cols > 1: + axis.table(cellText=[[label1], [label2]], + loc='top', cellLoc='center', + cellColours=[['lightgrey'], ['lightgrey']]) + else: + axis.table(cellText=[[label1]], loc='top', cellLoc='center', cellColours=[['lightgrey']]) + fig.subplots_adjust(wspace=0.05, hspace=0.2) return fig def facetize(layer, by): From 9397283fe991bb6d9935647ef996a71845cabecc Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 25 Jul 2012 19:54:46 +0300 Subject: [PATCH 19/88] Redefined GeomPoint class --- pandas/tools/rplot.py | 68 ++++++++++++------------------------------- 1 file changed, 19 insertions(+), 49 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 46d86cf156aef..ec6d9f4a94cae 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -195,13 +195,12 @@ class Layer: """ Layer object representing a single plot layer. """ - def __init__(self, geom, geom_params, data, aesthetics): - self.geom = geom - self.geom_params = geom_params + def __init__(self, data, aesthetics): self.data = data self.aesthetics = aesthetics - def render(self, ax): +class GeomPoint(Layer): + def work(self, ax): """Render the layer on a matplotlib axis. Parameters: @@ -212,23 +211,22 @@ def render(self, ax): -------- ax: matplotlib axis object """ - if self.geom == 'point': - for index in range(len(self.data)): - row = self.data.irow(index) - x = row[self.aesthetics['x']] - y = row[self.aesthetics['y']] - size_scaler = self.aesthetics['size'] - colour_scaler = self.aesthetics['colour'] - shape_scaler = self.aesthetics['shape'] - alpha = self.aesthetics['alpha'] - ax.scatter(x, y, - s=size_scaler(self.data, index), - c=colour_scaler(self.data, index), - marker=shape_scaler(self.data, index), - alpha=alpha) - ax.set_xlabel(self.aesthetics['x']) - ax.set_ylabel(self.aesthetics['y']) - return ax + for index in range(len(self.data)): + row = self.data.irow(index) + x = row[self.aesthetics['x']] + y = row[self.aesthetics['y']] + size_scaler = self.aesthetics['size'] + colour_scaler = self.aesthetics['colour'] + shape_scaler = self.aesthetics['shape'] + alpha = self.aesthetics['alpha'] + ax.scatter(x, y, + s=size_scaler(self.data, index), + c=colour_scaler(self.data, index), + marker=shape_scaler(self.data, index), + alpha=alpha) + ax.set_xlabel(self.aesthetics['x']) + ax.set_ylabel(self.aesthetics['y']) + return ax def display_grouped(grouped_data, x, y, fig): """A test routine to display grouped data. @@ -399,34 +397,6 @@ def __init__(self): def __add__(self, other): self.layers.append(other) -class GeomPoint: - def __init__(self, x=None, y=None, shape='o', colour='grey', size=20, alpha=1.0): - self.x = x - self.y = y - self.shape = shape - self.colour = colour - self.size = size - self.alpha = alpha - - def plot(self, rplot): - aes = rplot.aes - if self.x is not None: - aes['x'] = self.x - if self.y is not None: - aes['y'] = self.y - if type(self.colour) is not type(""): - colours = list(set(self.colour)) - for colour in colours: - xcol = filter_column(rplot.data, aes['x'], self.colour.name, colour) - ycol = filter_column(rplot.data, aes['y'], self.colour.name, colour) - rplot.ax.scatter(xcol, ycol, c=random_colour(colour), marker=self.shape, s=self.size, alpha=self.alpha, label=colour) - else: - rplot.ax.scatter(rplot.data[aes['x']], rplot.data[aes['y']], c=self.colour, marker=self.shape, s=self.size, alpha=self.alpha) - rplot.ax.set_xlabel(aes['x']) - rplot.ax.set_ylabel(aes['y']) - rplot.ax.legend() - return rplot - class GeomDensity2d: def __init__(self, x=None, y=None, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): self.x = x From 10b8a8a143439a9eb091fe9ab254a9db83b349c7 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 25 Jul 2012 20:26:04 +0300 Subject: [PATCH 20/88] Removed an unused function --- pandas/tools/rplot.py | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index ec6d9f4a94cae..eaa2d796632e2 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -200,7 +200,7 @@ def __init__(self, data, aesthetics): self.aesthetics = aesthetics class GeomPoint(Layer): - def work(self, ax): + def work(self, ax=None, fig=None): """Render the layer on a matplotlib axis. Parameters: @@ -316,7 +316,7 @@ def get_group(self, row, col): """ return self.group_grid[row][col] - def render(self, fig): + def work(self, ax=None, fig=None): """Render the trellis plot on a figure. Parameters: @@ -363,33 +363,6 @@ def render(self, fig): fig.subplots_adjust(wspace=0.05, hspace=0.2) return fig -def facetize(layer, by): - """Create a grouped plot by taking a layer and cloning it with changed data. - - Parameters: - ----------- - layer: an rplot layer object to be used as the basis for facetizing - by: column names to group by - - Returns: - -------- - a two dimensional array of layers arranged in a way that they would be displayed - """ - data = layer.data - grouped = data.groupby(by) - groups = grouped.groups.keys() - shingle1 = set([g[0] for g in groups]) - shingle2 = set([g[1] for g in groups]) - rows = len(shingle1) - cols = len(shingle2) - if cols == 0: - cols = 1 - grid = [[[] for _ in range(cols)] for _ in range(rows)] - col = 0 - row = 0 - for group, data in grouped: - pass - class RPlot: def __init__(self): self.layers = [] @@ -397,6 +370,10 @@ def __init__(self): def __add__(self, other): self.layers.append(other) + def show(self, fig): + for layer in self.layers: + pass + class GeomDensity2d: def __init__(self, x=None, y=None, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): self.x = x From f1cfc7638243e8b6c58c4357d8183d1acf56e4e4 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 25 Jul 2012 21:12:10 +0300 Subject: [PATCH 21/88] Added aes function --- pandas/tools/rplot.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index eaa2d796632e2..0fa72580e146f 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -191,16 +191,28 @@ def scaler(data, index): return constant return scaler +def aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): + """Create aesthetics dictionary. + """ + return { + 'x' : x, + 'y' : y, + 'size' : size, + 'colour' : colour, + 'shape' : shape, + 'alpha' : alpha, + } + class Layer: """ Layer object representing a single plot layer. """ - def __init__(self, data, aesthetics): + def __init__(self, data=None, aes=None): self.data = data - self.aesthetics = aesthetics + self.aes = aes class GeomPoint(Layer): - def work(self, ax=None, fig=None): + def work(self, rplot): """Render the layer on a matplotlib axis. Parameters: @@ -226,7 +238,7 @@ def work(self, ax=None, fig=None): alpha=alpha) ax.set_xlabel(self.aesthetics['x']) ax.set_ylabel(self.aesthetics['y']) - return ax + return ax, fig def display_grouped(grouped_data, x, y, fig): """A test routine to display grouped data. @@ -361,18 +373,22 @@ def work(self, ax=None, fig=None): else: axis.table(cellText=[[label1]], loc='top', cellLoc='center', cellColours=[['lightgrey']]) fig.subplots_adjust(wspace=0.05, hspace=0.2) - return fig + return ax, fig class RPlot: + """ + The main plot object. Add layers to an instance of this object to create a plot. + """ def __init__(self): self.layers = [] def __add__(self, other): self.layers.append(other) - def show(self, fig): - for layer in self.layers: - pass + def show(self, fig=None): + if fig is None: + fig = plt.gcf() + pass class GeomDensity2d: def __init__(self, x=None, y=None, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): From ddbf4ac9df1ccf01f847ff12515c850ba29e5657 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 16:34:21 +0300 Subject: [PATCH 22/88] Work on RPlot class --- pandas/tools/rplot.py | 137 +++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 54 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 0fa72580e146f..26d4f00c5f8f7 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -209,7 +209,15 @@ class Layer: """ def __init__(self, data=None, aes=None): self.data = data - self.aes = aes + if aes is None: + self.aes = aes() + else: + self.aes = aes + + def is_trellis(self): + """Return false to indicate this is not a TrellisGrid. + """ + return False class GeomPoint(Layer): def work(self, rplot): @@ -269,66 +277,22 @@ def display_grouped(grouped_data, x, y, fig): ax.scatter(group[x], group[y]) subplot_nr += 1 -class TrellisGrid: - def __init__(self, layer, by): +class TrellisGrid(Layer): + def __init__(self, by): """Initialize TreelisGrid instance. Parameters: ----------- - layer: a Layer object instance by: column names to group by """ - self.data = layer.data - self.grouped = self.data.groupby(by) - self.groups = self.grouped.groups.keys() self.by = by - self.shingle1 = set([g[0] for g in self.groups]) - self.shingle2 = set([g[1] for g in self.groups]) - self.rows = len(self.shingle1) - self.cols = len(self.shingle2) - self.grid = [[None for _ in range(self.cols)] for _ in range(self.rows)] - self.group_grid = [[None for _ in range(self.cols)] for _ in range(self.rows)] - row = 0 - col = 0 - for group, data in self.grouped: - new_layer = deepcopy(layer) - new_layer.data = data - self.grid[row][col] = new_layer - self.group_grid[row][col] = group - col += 1 - if col >= self.cols: - col = 0 - row += 1 - - def get_layer(self, row, col): - """Get a layer associated with the specified row and col. - - Parameters: - ----------- - row: integer row index - col: integer column index - - Returns: - -------- - layer object - """ - return self.grid[row][col] - - def get_group(self, row, col): - """Get a group tuple for the specified row and col. - Parameters: - ----------- - row: integer row index - col: integer column index - - Returns: - -------- - a tuple + def is_trellis(self): + """Return true to indicate this is a TrellisGrid. """ - return self.group_grid[row][col] + return True - def work(self, ax=None, fig=None): + def render(self, ax=None, fig=None): """Render the trellis plot on a figure. Parameters: @@ -375,12 +339,54 @@ def work(self, ax=None, fig=None): fig.subplots_adjust(wspace=0.05, hspace=0.2) return ax, fig + def preprocess(self, rplot): + rplot.trellised = True + layers = [] + for layer in rplot.layers: + data = layer.data + + + def work(self, rplot): + # For each layer in the layer list, replace it + # with a two dimentional array of trellised layers. + rplot.trellised = True + layers = [] + for layer in rplot.layers: + data = layer.data + grouped = data.groupby(by) + groups = grouped.groups.keys() + self.by = by + self.shingle1 = set([g[0] for g in self.groups]) + self.shingle2 = set([g[1] for g in self.groups]) + self.rows = len(self.shingle1) + self.cols = len(self.shingle2) + trellised = [[None for _ in range(self.cols)] for _ in range(self.rows)] + row = 0 + col = 0 + for group, data in self.grouped: + new_layer = deepcopy(layer) + new_layer.data = data + self.grid[row][col] = new_layer + self.group_grid[row][col] = group + col += 1 + if col >= self.cols: + col = 0 + row += 1 + layers.append(trellised) + rplot.layers = layers + +def sequence_layers(layers): + """Go through the list of layers and fill in the missing bits of information. + """ + pass + class RPlot: """ The main plot object. Add layers to an instance of this object to create a plot. """ - def __init__(self): - self.layers = [] + def __init__(self, data, x=None, y=None): + self.layers = [Layer(data, aes(x=x, y=y))] + trellised = False def __add__(self, other): self.layers.append(other) @@ -388,7 +394,30 @@ def __add__(self, other): def show(self, fig=None): if fig is None: fig = plt.gcf() - pass + # Look for the last TrellisGrid instance in the layer list + last_trellis = None + for layer in self.layers: + if layer.is_trellis(): + last_trellis = layer + if last_trellis is None: + # We have a simple, non-trellised plot + new_layers = sequence_layers(new_layers) + for layer in new_layers: + layer.work(fig.gca()) + # And we're done + return fig + else: + # We have a trellised plot. + # First let's remove all other TrellisGrid instances from the layer list, + # including this one. + new_layers = [] + for layer in self.layers: + if not layer.is_trellis(): + new_layers.append(layer) + new_layers = sequence_layers(new_layers) + # Now replace the old layers by their trellised versions + new_layers = last_trellis.trellis(new_layers) + # Prepare the subplots and draw on them class GeomDensity2d: def __init__(self, x=None, y=None, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): From 126112618aa62c48c3131b315aafb36129215dab Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 17:08:08 +0300 Subject: [PATCH 23/88] Reworked the GeomDensity2D class --- pandas/tools/rplot.py | 49 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 26d4f00c5f8f7..aa18a0915dbd1 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -220,17 +220,24 @@ def is_trellis(self): return False class GeomPoint(Layer): - def work(self, rplot): + def work(self, fig=None, ax=None): """Render the layer on a matplotlib axis. + You can specify either a figure or an axis to draw on. Parameters: ----------- + fig: matplotlib figure object ax: matplotlib axis object to draw on Returns: -------- - ax: matplotlib axis object + fig, ax: matplotlib figure and axis objects """ + if ax is None: + if fig is None: + return fig, ax + else: + ax = fig.gca() for index in range(len(self.data)): row = self.data.irow(index) x = row[self.aesthetics['x']] @@ -246,7 +253,41 @@ def work(self, rplot): alpha=alpha) ax.set_xlabel(self.aesthetics['x']) ax.set_ylabel(self.aesthetics['y']) - return ax, fig + return fig, ax + +class GeomDensity2D(Layer): + def work(self, fig=None, ax=None): + """Render the layer on a matplotlib axis. + You can specify either a figure or an axis to draw on. + + Parameters: + ----------- + fig: matplotlib figure object + ax: matplotlib axis object to draw on + + Returns: + -------- + fig, ax: matplotlib figure and axis objects + """ + if ax is None: + if fig is None: + return fig, ax + else: + ax = fig.gca() + x = self.data[self.aes['x']] + y = self.data[self.aes['y']] + rvs = np.array([x, y]) + x_min = x.min() + x_max = x.max() + y_min = y.min() + y_max = y.max() + X, Y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j] + positions = np.vstack([X.ravel(), Y.ravel()]) + values = np.vstack([x, y]) + kernel = stats.gaussian_kde(values) + Z = np.reshape(kernel(positions).T, X.shape) + ax.contour(Z, extent=[x_min, x_max, y_min, y_max]) + return rplot def display_grouped(grouped_data, x, y, fig): """A test routine to display grouped data. @@ -447,5 +488,5 @@ def plot(self, rplot): values = np.vstack([x, y]) kernel = stats.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) - rplot.ax.contour(Z, alpha=self.alpha, extent=[x_min, x_max, y_min, y_max]) + rplot.ax.contour(Z, extent=[x_min, x_max, y_min, y_max]) return rplot \ No newline at end of file From 526b20a0bb8e0aa31f69482f735eddaea07c69dd Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 17:17:42 +0300 Subject: [PATCH 24/88] Implemented sequence_layers and merge_aes functions --- pandas/tools/rplot.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index aa18a0915dbd1..8d4e118bd05ce 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -416,10 +416,34 @@ def work(self, rplot): layers.append(trellised) rplot.layers = layers +def merge_aes(layer1, layer2): + """Merges the aesthetics dictionaries for the two layers. + Look up sequence_layers function. Which layer is first and which + one is second is important. + + Parameters: + ----------- + layer1: Layer object + layer2: Layer object + """ + for key in layer2.keys(): + if layer2[key] is None: + layer2[key] = layer1[key] + def sequence_layers(layers): """Go through the list of layers and fill in the missing bits of information. + The basic rules are this: + * If the current layer has data set to None, take the data from previous layer. + * For each aesthetic mapping, if that mapping is set to None, take it from previous layer. + + Parameters: + ----------- + layers: a list of Layer objects """ - pass + for layer1, layer2 in zip(layers[:-1], layers[1:]): + if layer2.data is None: + layer2.data = layer1.data + layer2.aes = merge_aes(layer1, layer2) class RPlot: """ From b5c2700c976cf4992c9d6895f89cfee3369d7a2a Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 17:21:41 +0300 Subject: [PATCH 25/88] Removed old GeomDensity2d class --- pandas/tools/rplot.py | 37 +++---------------------------------- 1 file changed, 3 insertions(+), 34 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 8d4e118bd05ce..1336a6ea69e99 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -207,12 +207,12 @@ class Layer: """ Layer object representing a single plot layer. """ - def __init__(self, data=None, aes=None): + def __init__(self, data=None, aes_=None): self.data = data - if aes is None: + if aes_ is None: self.aes = aes() else: - self.aes = aes + self.aes = aes_ def is_trellis(self): """Return false to indicate this is not a TrellisGrid. @@ -483,34 +483,3 @@ def show(self, fig=None): # Now replace the old layers by their trellised versions new_layers = last_trellis.trellis(new_layers) # Prepare the subplots and draw on them - -class GeomDensity2d: - def __init__(self, x=None, y=None, weight=1.0, colour='grey', size=0.5, linetype=1.0, alpha=1.0): - self.x = x - self.y = y - self.weight = weight - self.colour = colour - self.size = size - self.linetype = linetype - self.alpha = alpha - - def plot(self, rplot): - aes = rplot.aes - if self.x is not None: - aes['x'] = self.x - if self.y is not None: - aes['y'] = self.y - x = rplot.data[aes['x']] - y = rplot.data[aes['y']] - rvs = np.array([x, y]) - x_min = x.min() - x_max = x.max() - y_min = y.min() - y_max = y.max() - X, Y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j] - positions = np.vstack([X.ravel(), Y.ravel()]) - values = np.vstack([x, y]) - kernel = stats.gaussian_kde(values) - Z = np.reshape(kernel(positions).T, X.shape) - rplot.ax.contour(Z, extent=[x_min, x_max, y_min, y_max]) - return rplot \ No newline at end of file From 1ef4ed68393b614c9fee2a6cd4da7642e9d86b53 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 17:23:29 +0300 Subject: [PATCH 26/88] Fix a typo --- pandas/tools/rplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 1336a6ea69e99..fc9168321e1a9 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -466,7 +466,7 @@ def show(self, fig=None): last_trellis = layer if last_trellis is None: # We have a simple, non-trellised plot - new_layers = sequence_layers(new_layers) + new_layers = sequence_layers(self.layers) for layer in new_layers: layer.work(fig.gca()) # And we're done From ad94148553323a0f102a9d63fb974a3d32d1e842 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 17:50:38 +0300 Subject: [PATCH 27/88] Numerous typo and trivial mistak fixes --- pandas/tools/rplot.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index fc9168321e1a9..1d3d1e9906917 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -219,6 +219,9 @@ def is_trellis(self): """ return False + def work(self, fig=None, ax=None): + pass + class GeomPoint(Layer): def work(self, fig=None, ax=None): """Render the layer on a matplotlib axis. @@ -240,12 +243,12 @@ def work(self, fig=None, ax=None): ax = fig.gca() for index in range(len(self.data)): row = self.data.irow(index) - x = row[self.aesthetics['x']] - y = row[self.aesthetics['y']] - size_scaler = self.aesthetics['size'] - colour_scaler = self.aesthetics['colour'] - shape_scaler = self.aesthetics['shape'] - alpha = self.aesthetics['alpha'] + x = row[self.aes['x']] + y = row[self.aes['y']] + size_scaler = self.aes['size'] + colour_scaler = self.aes['colour'] + shape_scaler = self.aes['shape'] + alpha = self.aes['alpha'] ax.scatter(x, y, s=size_scaler(self.data, index), c=colour_scaler(self.data, index), @@ -426,9 +429,9 @@ def merge_aes(layer1, layer2): layer1: Layer object layer2: Layer object """ - for key in layer2.keys(): - if layer2[key] is None: - layer2[key] = layer1[key] + for key in layer2.aes.keys(): + if layer2.aes[key] is None: + layer2.aes[key] = layer1.aes[key] def sequence_layers(layers): """Go through the list of layers and fill in the missing bits of information. @@ -443,7 +446,8 @@ def sequence_layers(layers): for layer1, layer2 in zip(layers[:-1], layers[1:]): if layer2.data is None: layer2.data = layer1.data - layer2.aes = merge_aes(layer1, layer2) + merge_aes(layer1, layer2) + return layers class RPlot: """ @@ -468,7 +472,7 @@ def show(self, fig=None): # We have a simple, non-trellised plot new_layers = sequence_layers(self.layers) for layer in new_layers: - layer.work(fig.gca()) + layer.work(fig=fig) # And we're done return fig else: From 5e7e5921a4df929e79f3dbefd8e6f9adb24e2a2d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 17:53:58 +0300 Subject: [PATCH 28/88] Added default aes dictionary constructing function --- pandas/tools/rplot.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 1d3d1e9906917..3a1ef702e5627 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -191,8 +191,20 @@ def scaler(data, index): return constant return scaler +def default_aes(x, y): + """Create the default aesthetics dictionary. + """ + return { + 'x' : x, + 'y' : y, + 'size' : scale_constant(40.0), + 'colour' : scale_constant('grey'), + 'shape' : scale_constant('o'), + 'alpha' : scale_constant(1.0), + } + def aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): - """Create aesthetics dictionary. + """Create an empty aesthetics dictionary. """ return { 'x' : x, From 403c435b68d2dbf16b3af285322f7b395ae98429 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 18:07:40 +0300 Subject: [PATCH 29/88] Fixed issues with GeomPoint and GeomDensity2D --- pandas/tools/rplot.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 3a1ef702e5627..aead4232a01c5 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -191,7 +191,7 @@ def scaler(data, index): return constant return scaler -def default_aes(x, y): +def default_aes(x=None, y=None): """Create the default aesthetics dictionary. """ return { @@ -265,9 +265,9 @@ def work(self, fig=None, ax=None): s=size_scaler(self.data, index), c=colour_scaler(self.data, index), marker=shape_scaler(self.data, index), - alpha=alpha) - ax.set_xlabel(self.aesthetics['x']) - ax.set_ylabel(self.aesthetics['y']) + alpha=alpha(self.data, index)) + ax.set_xlabel(self.aes['x']) + ax.set_ylabel(self.aes['y']) return fig, ax class GeomDensity2D(Layer): @@ -302,7 +302,7 @@ def work(self, fig=None, ax=None): kernel = stats.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) ax.contour(Z, extent=[x_min, x_max, y_min, y_max]) - return rplot + return fig, ax def display_grouped(grouped_data, x, y, fig): """A test routine to display grouped data. @@ -466,7 +466,7 @@ class RPlot: The main plot object. Add layers to an instance of this object to create a plot. """ def __init__(self, data, x=None, y=None): - self.layers = [Layer(data, aes(x=x, y=y))] + self.layers = [Layer(data, default_aes(x=x, y=y))] trellised = False def __add__(self, other): From 65e710737974ca0721ab0e1814c4d533c250f156 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 26 Jul 2012 18:52:10 +0300 Subject: [PATCH 30/88] Work on TrellisGrid class --- pandas/tools/rplot.py | 48 ++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index aead4232a01c5..86b6002bfaa6f 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -395,41 +395,42 @@ def render(self, ax=None, fig=None): fig.subplots_adjust(wspace=0.05, hspace=0.2) return ax, fig - def preprocess(self, rplot): - rplot.trellised = True - layers = [] - for layer in rplot.layers: - data = layer.data + def trellis(self, layers): + """Create a trellis structure for a list of layers. + Each layer will be cloned with different data in to a two dimensional grid. + Parameters: + ----------- + layers: a list of Layer objects - def work(self, rplot): - # For each layer in the layer list, replace it - # with a two dimentional array of trellised layers. - rplot.trellised = True - layers = [] - for layer in rplot.layers: + Returns: + -------- + trellised_layers: Clones of each layer in the list arranged in a trellised latice + """ + trellised_layers = [] + for layer in layers: data = layer.data - grouped = data.groupby(by) + grouped = data.groupby(self.by) groups = grouped.groups.keys() - self.by = by - self.shingle1 = set([g[0] for g in self.groups]) - self.shingle2 = set([g[1] for g in self.groups]) - self.rows = len(self.shingle1) - self.cols = len(self.shingle2) - trellised = [[None for _ in range(self.cols)] for _ in range(self.rows)] + shingle1 = set([g[0] for g in groups]) + shingle2 = set([g[1] for g in groups]) + rows = len(shingle1) + cols = len(shingle2) + trellised = [[None for _ in range(cols)] for _ in range(rows)] + self.group_grid = [[None for _ in range(cols)] for _ in range(rows)] row = 0 col = 0 - for group, data in self.grouped: + for group, data in grouped: new_layer = deepcopy(layer) new_layer.data = data - self.grid[row][col] = new_layer + trellised[row][col] = new_layer self.group_grid[row][col] = group col += 1 - if col >= self.cols: + if col >= cols: col = 0 row += 1 - layers.append(trellised) - rplot.layers = layers + trellised_layers.append(trellised) + return trellised_layers def merge_aes(layer1, layer2): """Merges the aesthetics dictionaries for the two layers. @@ -498,4 +499,5 @@ def show(self, fig=None): new_layers = sequence_layers(new_layers) # Now replace the old layers by their trellised versions new_layers = last_trellis.trellis(new_layers) + print new_layers # Prepare the subplots and draw on them From 3aa160b5cb5ff6c6691a833d05e7c14b5f03bc89 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 30 Jul 2012 18:07:03 +0300 Subject: [PATCH 31/88] Got rid of the is_trellis method, use isinstance instead --- pandas/tools/rplot.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 86b6002bfaa6f..d7e776b7c5cb7 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -226,11 +226,6 @@ def __init__(self, data=None, aes_=None): else: self.aes = aes_ - def is_trellis(self): - """Return false to indicate this is not a TrellisGrid. - """ - return False - def work(self, fig=None, ax=None): pass @@ -343,11 +338,6 @@ def __init__(self, by): """ self.by = by - def is_trellis(self): - """Return true to indicate this is a TrellisGrid. - """ - return True - def render(self, ax=None, fig=None): """Render the trellis plot on a figure. @@ -479,7 +469,7 @@ def show(self, fig=None): # Look for the last TrellisGrid instance in the layer list last_trellis = None for layer in self.layers: - if layer.is_trellis(): + if isinstance(layer, TrellisGrid): last_trellis = layer if last_trellis is None: # We have a simple, non-trellised plot From e0aa6d4d49b239c70d07e1c45fabd0ad9813f5da Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 00:11:20 +0300 Subject: [PATCH 32/88] Added sequence_grids function --- pandas/tools/rplot.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index d7e776b7c5cb7..4be45bd461a18 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -452,6 +452,21 @@ def sequence_layers(layers): merge_aes(layer1, layer2) return layers +def sequence_grids(layer_grids): + """Go through the list of layer girds and perform the same thing as sequence_layers. + + Parameters: + ----------- + layer_grids: a list of two dimensional layer grids + """ + for grid1, grid2 in zip(layer_grids[:-1], layer_grids[1:]): + for row1, row2 in zip(grid1, grid2): + for layer1, layer2 in zip(row1, row2): + if layer2.data is None: + layer2.data = layer1.data + merge_aes(layer1, layer2): + return layer_grids + class RPlot: """ The main plot object. Add layers to an instance of this object to create a plot. @@ -489,5 +504,5 @@ def show(self, fig=None): new_layers = sequence_layers(new_layers) # Now replace the old layers by their trellised versions new_layers = last_trellis.trellis(new_layers) - print new_layers # Prepare the subplots and draw on them + new_layers = sequence_grids(new_layers) \ No newline at end of file From 33e3e8b954fd90fa13242bf45abea5e48360da3d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 00:25:23 +0300 Subject: [PATCH 33/88] Added work_grid function --- pandas/tools/rplot.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 4be45bd461a18..b1e382e862f74 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -467,6 +467,27 @@ def sequence_grids(layer_grids): merge_aes(layer1, layer2): return layer_grids +def work_grid(grid, fig): + """Take a two dimensional grid, add subplots to a figure for each cell and do layer work. + + Parameters: + ----------- + grid: a two dimensional grid of layers + fig: matplotlib figure to draw on + + Returns: + -------- + axes: a two dimensional list of matplotlib axes + """ + nrows = len(grid) + ncols = len(grid[0]) + axes = [[None for _ in range(ncols)] for _ in range(nrows)] + for row in range(nrows): + for col in range(ncols): + axes[row][col] = fig.add_subplot(nrows, ncols, ncols * row + col + 1) + grid[row][col].work(ax=axes[row][col]) + return axes + class RPlot: """ The main plot object. Add layers to an instance of this object to create a plot. @@ -491,18 +512,19 @@ def show(self, fig=None): new_layers = sequence_layers(self.layers) for layer in new_layers: layer.work(fig=fig) - # And we're done - return fig else: # We have a trellised plot. # First let's remove all other TrellisGrid instances from the layer list, # including this one. new_layers = [] for layer in self.layers: - if not layer.is_trellis(): + if not isinstance(layer, TrellisGrid): new_layers.append(layer) new_layers = sequence_layers(new_layers) # Now replace the old layers by their trellised versions new_layers = last_trellis.trellis(new_layers) # Prepare the subplots and draw on them - new_layers = sequence_grids(new_layers) \ No newline at end of file + new_layers = sequence_grids(new_layers) + + # And we're done + return fig \ No newline at end of file From b93b287fb2455bc135d0d9320d74ef09249c2a11 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 00:42:28 +0300 Subject: [PATCH 34/88] Make sure the user supplies a list of length 2 to group by --- pandas/tools/rplot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index b1e382e862f74..5486f83386959 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -336,6 +336,8 @@ def __init__(self, by): ----------- by: column names to group by """ + if len(by) != 2: + raise ValueError("You must give a list of length 2 to group by") self.by = by def render(self, ax=None, fig=None): @@ -464,7 +466,7 @@ def sequence_grids(layer_grids): for layer1, layer2 in zip(row1, row2): if layer2.data is None: layer2.data = layer1.data - merge_aes(layer1, layer2): + merge_aes(layer1, layer2) return layer_grids def work_grid(grid, fig): @@ -525,6 +527,6 @@ def show(self, fig=None): new_layers = last_trellis.trellis(new_layers) # Prepare the subplots and draw on them new_layers = sequence_grids(new_layers) - + [work_grid(grid, fig) for grid in new_layers] # And we're done return fig \ No newline at end of file From 4bb179ad67b3f6db9db978fae217c6f443bf92d6 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 10:50:05 +0300 Subject: [PATCH 35/88] Make sure the user passes a Layer instance to RPlot + operator --- pandas/tools/rplot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 5486f83386959..4cbb394850e51 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -499,6 +499,8 @@ def __init__(self, data, x=None, y=None): trellised = False def __add__(self, other): + if not isinstance(other, Layer): + raise TypeError("The operand on the right side of + must be a Layer instance") self.layers.append(other) def show(self, fig=None): @@ -527,6 +529,7 @@ def show(self, fig=None): new_layers = last_trellis.trellis(new_layers) # Prepare the subplots and draw on them new_layers = sequence_grids(new_layers) - [work_grid(grid, fig) for grid in new_layers] + axes_grids = [work_grid(grid, fig) for grid in new_layers] + axes_grid = axes_grids[-1] # And we're done return fig \ No newline at end of file From 57739280634360d987399d39e96aa9542d92aff6 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 13:19:57 +0300 Subject: [PATCH 36/88] Implemented adjust_subplots function --- pandas/tools/rplot.py | 89 ++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 52 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 4cbb394850e51..cc2f84aee7ae8 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -340,53 +340,6 @@ def __init__(self, by): raise ValueError("You must give a list of length 2 to group by") self.by = by - def render(self, ax=None, fig=None): - """Render the trellis plot on a figure. - - Parameters: - ----------- - fig: matplotlib figure to draw on - - Returns: - -------- - matplotlib figure - """ - index = 1 - axes = [] - for row in self.grid: - for layer in row: - ax = fig.add_subplot(self.rows, self.cols, index) - layer.render(ax) - axes.append(ax) - index += 1 - min_x = min([ax.get_xlim()[0] for ax in axes]) - max_x = max([ax.get_xlim()[1] for ax in axes]) - min_y = min([ax.get_ylim()[0] for ax in axes]) - max_y = max([ax.get_ylim()[1] for ax in axes]) - [ax.set_xlim(min_x, max_x) for ax in axes] - [ax.set_ylim(min_y, max_y) for ax in axes] - for index, axis in enumerate(axes): - if index % self.cols == 0: - pass - else: - axis.get_yaxis().set_ticks([]) - axis.set_ylabel('') - if index / self.cols == self.rows - 1: - pass - else: - axis.get_xaxis().set_ticks([]) - axis.set_xlabel('') - label1 = "%s = %s" % (self.by[0], self.group_grid[index / self.cols][index % self.cols][0]) - label2 = "%s = %s" % (self.by[1], self.group_grid[index / self.cols][index % self.cols][1]) - if self.cols > 1: - axis.table(cellText=[[label1], [label2]], - loc='top', cellLoc='center', - cellColours=[['lightgrey'], ['lightgrey']]) - else: - axis.table(cellText=[[label1]], loc='top', cellLoc='center', cellColours=[['lightgrey']]) - fig.subplots_adjust(wspace=0.05, hspace=0.2) - return ax, fig - def trellis(self, layers): """Create a trellis structure for a list of layers. Each layer will be cloned with different data in to a two dimensional grid. @@ -406,10 +359,10 @@ def trellis(self, layers): groups = grouped.groups.keys() shingle1 = set([g[0] for g in groups]) shingle2 = set([g[1] for g in groups]) - rows = len(shingle1) - cols = len(shingle2) - trellised = [[None for _ in range(cols)] for _ in range(rows)] - self.group_grid = [[None for _ in range(cols)] for _ in range(rows)] + self.rows = len(shingle1) + self.cols = len(shingle2) + trellised = [[None for _ in range(self.cols)] for _ in range(self.rows)] + self.group_grid = [[None for _ in range(self.cols)] for _ in range(self.rows)] row = 0 col = 0 for group, data in grouped: @@ -418,7 +371,7 @@ def trellis(self, layers): trellised[row][col] = new_layer self.group_grid[row][col] = group col += 1 - if col >= cols: + if col >= self.cols: col = 0 row += 1 trellised_layers.append(trellised) @@ -490,6 +443,37 @@ def work_grid(grid, fig): grid[row][col].work(ax=axes[row][col]) return axes +def adjust_subplots(fig, axes, trellis): + # Flatten the axes grid + axes = [ax for row in axes for ax in row] + min_x = min([ax.get_xlim()[0] for ax in axes]) + max_x = max([ax.get_xlim()[1] for ax in axes]) + min_y = min([ax.get_ylim()[0] for ax in axes]) + max_y = max([ax.get_ylim()[1] for ax in axes]) + [ax.set_xlim(min_x, max_x) for ax in axes] + [ax.set_ylim(min_y, max_y) for ax in axes] + for index, axis in enumerate(axes): + if index % trellis.cols == 0: + pass + else: + axis.get_yaxis().set_ticks([]) + axis.set_ylabel('') + if index / trellis.cols == trellis.rows - 1: + pass + else: + axis.get_xaxis().set_ticks([]) + axis.set_xlabel('') + label1 = "%s = %s" % (trellis.by[0], trellis.group_grid[index / trellis.cols][index % trellis.cols][0]) + label2 = "%s = %s" % (trellis.by[1], trellis.group_grid[index / trellis.cols][index % trellis.cols][1]) + if trellis.cols > 1: + axis.table(cellText=[[label1], [label2]], + loc='top', cellLoc='center', + cellColours=[['lightgrey'], ['lightgrey']]) + else: + axis.table(cellText=[[label1]], loc='top', cellLoc='center', cellColours=[['lightgrey']]) + fig.subplots_adjust(wspace=0.05, hspace=0.2) + return ax, fig + class RPlot: """ The main plot object. Add layers to an instance of this object to create a plot. @@ -531,5 +515,6 @@ def show(self, fig=None): new_layers = sequence_grids(new_layers) axes_grids = [work_grid(grid, fig) for grid in new_layers] axes_grid = axes_grids[-1] + adjust_subplots(fig, axes_grid, last_trellis) # And we're done return fig \ No newline at end of file From b3a21f64a8cf62696541d94a205891681e22f46e Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 14:03:45 +0300 Subject: [PATCH 37/88] Added the ability to group by a single attribute in to either rows or cols --- pandas/tools/rplot.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index cc2f84aee7ae8..04f0332b80885 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -338,6 +338,8 @@ def __init__(self, by): """ if len(by) != 2: raise ValueError("You must give a list of length 2 to group by") + elif by[0] == '.' and by[1] == '.': + raise ValueError("At least one of grouping attributes must be not a dot") self.by = by def trellis(self, layers): @@ -355,12 +357,27 @@ def trellis(self, layers): trellised_layers = [] for layer in layers: data = layer.data - grouped = data.groupby(self.by) + if self.by[0] == '.': + grouped = data.groupby(self.by[1]) + elif self.by[1] == '.': + grouped = data.groupby(self.by[0]) + else: + grouped = data.groupby(self.by) groups = grouped.groups.keys() - shingle1 = set([g[0] for g in groups]) - shingle2 = set([g[1] for g in groups]) - self.rows = len(shingle1) - self.cols = len(shingle2) + if self.by[0] == '.' or self.by[1] == '.': + shingle1 = set([g for g in groups]) + else: + shingle1 = set([g[0] for g in groups]) + shingle2 = set([g[1] for g in groups]) + if self.by[0] == '.': + self.rows = 1 + self.cols = len(shingle1) + elif self.by[1] == '.': + self.rows = len(shingle1) + self.cols = 1 + else: + self.rows = len(shingle1) + self.cols = len(shingle2) trellised = [[None for _ in range(self.cols)] for _ in range(self.rows)] self.group_grid = [[None for _ in range(self.cols)] for _ in range(self.rows)] row = 0 @@ -463,9 +480,16 @@ def adjust_subplots(fig, axes, trellis): else: axis.get_xaxis().set_ticks([]) axis.set_xlabel('') - label1 = "%s = %s" % (trellis.by[0], trellis.group_grid[index / trellis.cols][index % trellis.cols][0]) - label2 = "%s = %s" % (trellis.by[1], trellis.group_grid[index / trellis.cols][index % trellis.cols][1]) - if trellis.cols > 1: + if trellis.by[0] == '.': + label1 = "%s = %s" % (trellis.by[1], trellis.group_grid[index / trellis.cols][index % trellis.cols]) + label2 = None + elif trellis.by[1] == '.': + label1 = "%s = %s" % (trellis.by[0], trellis.group_grid[index / trellis.cols][index % trellis.cols]) + label2 = None + else: + label1 = "%s = %s" % (trellis.by[0], trellis.group_grid[index / trellis.cols][index % trellis.cols][0]) + label2 = "%s = %s" % (trellis.by[1], trellis.group_grid[index / trellis.cols][index % trellis.cols][1]) + if label2 is not None: axis.table(cellText=[[label1], [label2]], loc='top', cellLoc='center', cellColours=[['lightgrey'], ['lightgrey']]) From 36023002d05ce8a93b78a45b81ea729f6992ba2c Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 14:42:55 +0300 Subject: [PATCH 38/88] Some adjustments --- pandas/tools/rplot.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 04f0332b80885..63cebef46bb2e 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -63,7 +63,7 @@ def parse_facets(facet): lhs, rhs = [col.strip() for col in facet.split('~')] return (lhs, rhs) -def scale_size(column, categorical, min_size=1.0, max_size=80.0): +def scale_size(column, min_size=1.0, max_size=80.0): """Creates a function that converts between a data attribute to point size. Parameters: @@ -78,13 +78,10 @@ def scale_size(column, categorical, min_size=1.0, max_size=80.0): a function of two arguments that takes a data set and a row number, returns float """ def scaler(data, index): - if categorical: - pass - else: - x = data[column].iget(index) - a = min(data[column]) - b = max(data[column]) - return min_size + ((x - a) / (b - a)) * (max_size - min_size) + x = data[column].iget(index) + a = float(min(data[column])) + b = float(max(data[column])) + return min_size + ((x - a) / (b - a)) * (max_size - min_size) return scaler def scale_gradient(column, categorical, colour1=(0.0, 0.0, 0.0), colour2=(1.0, 0.7, 0.8)): @@ -219,12 +216,12 @@ class Layer: """ Layer object representing a single plot layer. """ - def __init__(self, data=None, aes_=None): + def __init__(self, data=None, aesthetics=None): self.data = data - if aes_ is None: + if aesthetics is None: self.aes = aes() else: - self.aes = aes_ + self.aes = aesthetics def work(self, fig=None, ax=None): pass From fcf35db8d6c90fdfe13e123a6e196e8439f4b305 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 14:53:27 +0300 Subject: [PATCH 39/88] Created a random colour scaler --- pandas/tools/rplot.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 63cebef46bb2e..4cafeb1318f4e 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -12,7 +12,7 @@ # * Expand RPlot class # -def random_colour(name): +def scale_random_colour(column): """Random colour from a string or other hashable value. Parameters: @@ -23,8 +23,10 @@ def random_colour(name): -------- (r, g, b): Where r, g, b are random numbers in the range (0, 1). """ - random.seed(name) - return [random.random() for _ in range(3)] + def scaler(data, index): + random.seed(data[column].iget(index)) + return [random.random() for _ in range(3)] + return scaler def filter_column(frame, column, filter_column, filter_value): """Select only those values from column that have a specified value in another column. From fe56b4c3135b0416e211d87d5eddf98025308082 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 21:09:28 +0300 Subject: [PATCH 40/88] Added GeomPolyFit class --- pandas/tools/rplot.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 4cafeb1318f4e..a2215e4191394 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -264,6 +264,31 @@ def work(self, fig=None, ax=None): ax.set_ylabel(self.aes['y']) return fig, ax +class GeomPolyFit(Layer): + def __init__(self, degree, lw=2.0, colour='grey'): + self.degree = degree + self.lw = lw + self.colour = colour + Layer.__init__(self) + + def work(self, fig=None, ax=None): + if ax is None: + if fig is None: + return fig, ax + else: + ax = fig.gca() + from numpy.polynomial.polynomial import polyfit + from numpy.polynomial.polynomial import polyval + x = self.data[self.aes['x']] + y = self.data[self.aes['y']] + min_x = min(x) + max_x = max(x) + c = polyfit(x, y, self.degree) + x_ = np.linspace(min_x, max_x, len(x)) + y_ = polyval(x_, c) + ax.plot(x_, y_, lw=self.lw, c=self.colour) + return fig, ax + class GeomDensity2D(Layer): def work(self, fig=None, ax=None): """Render the layer on a matplotlib axis. From 89633755c6e656d517da62b9152a55a7a2fa6cdd Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 21:13:30 +0300 Subject: [PATCH 41/88] Added GeomScatter class --- pandas/tools/rplot.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index a2215e4191394..c5958918cacea 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -289,6 +289,23 @@ def work(self, fig=None, ax=None): ax.plot(x_, y_, lw=self.lw, c=self.colour) return fig, ax +class GeomScatter(Layer): + def __init__(self, **kwds): + self.kwds = kwds + Layer.__init__(self) + + def work(self, fig=None, ax=None): + if ax is None: + if fig is None: + return fig, ax + else: + ax = fig.gca() + x = self.data[self.aes['x']] + y = self.data[self.aes['y']] + ax.scatter(x, y, **self.kwds) + return fig, ax + + class GeomDensity2D(Layer): def work(self, fig=None, ax=None): """Render the layer on a matplotlib axis. From 20ce52a8f7b67afdd9f73bfebde93200f0f911ab Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 22:27:54 +0300 Subject: [PATCH 42/88] Added GeomHistogram class --- pandas/tools/rplot.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index c5958918cacea..4165ec3552ae8 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -305,6 +305,21 @@ def work(self, fig=None, ax=None): ax.scatter(x, y, **self.kwds) return fig, ax +class GeomHistogram(Layer): + def __init__(self, bins=10, colour='grey'): + self.bins = bins + self.colour = colour + Layer.__init__(self) + + def work(self, fig=None, ax=None): + if ax is None: + if fig is None: + return fig, ax + else: + ax = fig.gca() + x = self.data[self.aes['x']] + ax.hist(x, self.bins, facecolor=self.colour) + return fig, ax class GeomDensity2D(Layer): def work(self, fig=None, ax=None): From dc23870115f0a29712df89a7b17837846ed81b70 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 31 Jul 2012 22:52:25 +0300 Subject: [PATCH 43/88] Added GeomDensity class --- pandas/tools/rplot.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 4165ec3552ae8..67c19957106cb 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -321,6 +321,20 @@ def work(self, fig=None, ax=None): ax.hist(x, self.bins, facecolor=self.colour) return fig, ax +class GeomDensity(Layer): + def work(self, fig=None, ax=None): + if ax is None: + if fig is None: + return fig, ax + else: + ax = fig.gca() + from scipy.stats import gaussian_kde + x = self.data[self.aes['x']] + gkde = gaussian_kde(x) + ind = np.linspace(x.min(), x.max(), 200) + ax.plot(ind, gkde.evaluate(ind)) + return fig, ax + class GeomDensity2D(Layer): def work(self, fig=None, ax=None): """Render the layer on a matplotlib axis. From e82ed054793693d461b96a0b26eb2565f2fb6747 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 1 Aug 2012 15:17:26 +0300 Subject: [PATCH 44/88] Docstring updates --- pandas/tools/rplot.py | 107 ++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 67c19957106cb..f4d69a6ed8b7f 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -7,64 +7,27 @@ # # TODO: -# * Make sure trellis display works when two or one grouping variable is specified -# * Enable labelling for legend display -# * Expand RPlot class +# * Make sure legends work properly # def scale_random_colour(column): - """Random colour from a string or other hashable value. + """Creates a function that assigns each value in a DataFrame + column a random colour from RGB space. Parameters: ----------- - name: A string value to use as a seed to the random number generator. + column: string, a column name Returns: -------- - (r, g, b): Where r, g, b are random numbers in the range (0, 1). + a function of two arguments that takes a data set and row number, returns + a list of three elements, representing an RGB colour """ def scaler(data, index): random.seed(data[column].iget(index)) return [random.random() for _ in range(3)] return scaler -def filter_column(frame, column, filter_column, filter_value): - """Select only those values from column that have a specified value in another column. - - Parameters: - ----------- - frame: pandas data frame object. - column: column name from which to select values. - filter_column: column used to filter. - filter_value: only those rows with this value in filter_column will be selected. - - Returns: - -------- - numpy array with filtered values. - """ - n = len(frame) - vcol = frame[column] - fcol = frame[filter_column] - result = [] - for v, f in zip(vcol, fcol): - if f == filter_value: - result.append(v) - return np.array(result) - -def parse_facets(facet): - """Parse facets formula of the form 'lhs ~ rhs'. - - Parameters: - ----------- - facets: facets formula. - - Returns: - -------- - A list with LHS and RHS column names. - """ - lhs, rhs = [col.strip() for col in facet.split('~')] - return (lhs, rhs) - def scale_size(column, min_size=1.0, max_size=80.0): """Creates a function that converts between a data attribute to point size. @@ -166,7 +129,8 @@ def scale_shape(column): Returns: -------- - a function of two arguments + a function of two arguments, takes DataFrame and row index, return string with + matplotlib marker character """ shapes = ['o', 'D', 'h', 'H', '_', '8', 'p', '+', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] def scaler(data, index): @@ -184,7 +148,8 @@ def scale_constant(constant): Returns: -------- - a two argument function + a two argument function, takes DataFrame and row index, + returns specified value """ def scaler(data, index): return constant @@ -192,6 +157,15 @@ def scaler(data, index): def default_aes(x=None, y=None): """Create the default aesthetics dictionary. + + Parameters: + ----------- + x: string, DataFrame column name + y: string, DataFrame column name + + Returns: + -------- + a dictionary with aesthetics bindings """ return { 'x' : x, @@ -202,8 +176,21 @@ def default_aes(x=None, y=None): 'alpha' : scale_constant(1.0), } -def aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): +def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): """Create an empty aesthetics dictionary. + + Parameters: + ----------- + x: string, DataFrame column name + y: string, DataFrame column name + size: function, binding for size attribute of Geoms + colour: function, binding for colour attribute of Geoms + shape: function, binding for shape attribute of Geoms + alpha: function, binding for alpha attribute of Geoms + + Returns: + -------- + a dictionary with aesthetics bindings """ return { 'x' : x, @@ -218,15 +205,33 @@ class Layer: """ Layer object representing a single plot layer. """ - def __init__(self, data=None, aesthetics=None): + def __init__(self, data=None, aes=None): + """Initialize layer object. + + Parameters: + ----------- + data: pandas DataFrame instance + aes: aesthetics dictionary with bindings + """ self.data = data - if aesthetics is None: - self.aes = aes() + if aes is None: + self.aes = make_aes() else: - self.aes = aesthetics + self.aes = aes def work(self, fig=None, ax=None): - pass + """Do the drawing (usually) work. + + Parameters: + ----------- + fig: matplotlib figure + ax: matplotlib axis object + + Returns: + -------- + a tuple with the same figure and axis instances + """ + return fig, ax class GeomPoint(Layer): def work(self, fig=None, ax=None): From f2e4fbe308ccf3277895d5facf7f78fbe26c23fd Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 1 Aug 2012 17:21:51 +0300 Subject: [PATCH 45/88] Docstring updates --- pandas/tools/rplot.py | 131 +++++++++++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 35 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index f4d69a6ed8b7f..4242c7884222c 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -270,13 +270,35 @@ def work(self, fig=None, ax=None): return fig, ax class GeomPolyFit(Layer): + """ + Draw a polynomial fit of specified degree. + """ def __init__(self, degree, lw=2.0, colour='grey'): + """Initialize GeomPolyFit object. + + Parameters: + ----------- + degree: an integer, polynomial degree + lw: line width + colour: matplotlib colour + """ self.degree = degree self.lw = lw self.colour = colour Layer.__init__(self) def work(self, fig=None, ax=None): + """Draw the polynomial fit on matplotlib figure or axis + + Parameters: + ----------- + fig: matplotlib figure + ax: matplotlib axis + + Returns: + -------- + a tuple with figure and axis objects + """ if ax is None: if fig is None: return fig, ax @@ -295,11 +317,35 @@ def work(self, fig=None, ax=None): return fig, ax class GeomScatter(Layer): - def __init__(self, **kwds): - self.kwds = kwds + """ + An efficient scatter plot, use this instead of GeomPoint for speed. + """ + def __init__(self, marker='o', colour='lightblue', alpha=1.0): + """Initialize GeomScatter instance. + + Parameters: + ----------- + marker: matplotlib marker string + colour: matplotlib colour + alpha: matplotlib alpha + """ + self.marker = marker + self.colour = colour + self.alpha = alpha Layer.__init__(self) def work(self, fig=None, ax=None): + """Draw a scatter plot on matplotlib figure or axis + + Parameters: + ----------- + fig: matplotlib figure + ax: matplotlib axis + + Returns: + -------- + a tuple with figure and axis objects + """ if ax is None: if fig is None: return fig, ax @@ -307,16 +353,37 @@ def work(self, fig=None, ax=None): ax = fig.gca() x = self.data[self.aes['x']] y = self.data[self.aes['y']] - ax.scatter(x, y, **self.kwds) + ax.scatter(x, y, marker=self.marker, c=self.colour, alpha=self.alpha) return fig, ax class GeomHistogram(Layer): - def __init__(self, bins=10, colour='grey'): + """ + An efficient histogram, use this instead of GeomBar for speed. + """ + def __init__(self, bins=10, colour='lightblue'): + """Initialize GeomHistogram instance. + + Parameters: + ----------- + bins: integer, number of histogram bins + colour: matplotlib colour + """ self.bins = bins self.colour = colour Layer.__init__(self) def work(self, fig=None, ax=None): + """Draw a histogram on matplotlib figure or axis + + Parameters: + ----------- + fig: matplotlib figure + ax: matplotlib axis + + Returns: + -------- + a tuple with figure and axis objects + """ if ax is None: if fig is None: return fig, ax @@ -327,7 +394,22 @@ def work(self, fig=None, ax=None): return fig, ax class GeomDensity(Layer): + """ + A kernel density estimation plot. + """ def work(self, fig=None, ax=None): + """Draw a one dimensional kernel density plot. + You can specify either a figure or an axis to draw on. + + Parameters: + ----------- + fig: matplotlib figure object + ax: matplotlib axis object to draw on + + Returns: + -------- + fig, ax: matplotlib figure and axis objects + """ if ax is None: if fig is None: return fig, ax @@ -342,7 +424,7 @@ def work(self, fig=None, ax=None): class GeomDensity2D(Layer): def work(self, fig=None, ax=None): - """Render the layer on a matplotlib axis. + """Draw a two dimensional kernel density plot. You can specify either a figure or an axis to draw on. Parameters: @@ -374,35 +456,6 @@ def work(self, fig=None, ax=None): ax.contour(Z, extent=[x_min, x_max, y_min, y_max]) return fig, ax -def display_grouped(grouped_data, x, y, fig): - """A test routine to display grouped data. - - Parameters: - ----------- - grouped_data: data frame grouped by df.groupby pandas routine - fig: matplotlib figure - - Returns: - -------- - Nothing - """ - shingle1 = set([]) - shingle2 = set([]) - # Fill shingles. - for name, group in grouped_data: - if type(name) is type(()): - shingle1.add(name[0]) - shingle2.add(name[1]) - else: - shingle1.add(name) - rows = len(shingle1) - cols = len(shingle2) - subplot_nr = 1 - for name, group, in grouped_data: - ax = fig.add_subplot(rows, cols, subplot_nr) - ax.scatter(group[x], group[y]) - subplot_nr += 1 - class TrellisGrid(Layer): def __init__(self, by): """Initialize TreelisGrid instance. @@ -536,6 +589,15 @@ def work_grid(grid, fig): return axes def adjust_subplots(fig, axes, trellis): + """Adjust the subtplots on matplotlib figure with the + fact that we have a trellis plot in mind. + + Parameters: + ----------- + fig: matplotlib figure + axes: a two dimensional grid of matplotlib axes + trellis: TrellisGrid object + """ # Flatten the axes grid axes = [ax for row in axes for ax in row] min_x = min([ax.get_xlim()[0] for ax in axes]) @@ -571,7 +633,6 @@ def adjust_subplots(fig, axes, trellis): else: axis.table(cellText=[[label1]], loc='top', cellLoc='center', cellColours=[['lightgrey']]) fig.subplots_adjust(wspace=0.05, hspace=0.2) - return ax, fig class RPlot: """ From 8036c70de5f804aee0386a2bd67eeee0a37a942a Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 1 Aug 2012 17:24:21 +0300 Subject: [PATCH 46/88] Enabled x axis label for histograms --- pandas/tools/rplot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 4242c7884222c..48be5325adff0 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -391,6 +391,7 @@ def work(self, fig=None, ax=None): ax = fig.gca() x = self.data[self.aes['x']] ax.hist(x, self.bins, facecolor=self.colour) + ax.set_xlabel(self.aes['x']) return fig, ax class GeomDensity(Layer): From ca34b54cad8173b13b6180bd3e30cb32e5d51b4e Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 1 Aug 2012 19:59:34 +0300 Subject: [PATCH 47/88] Finished updating docstrings --- pandas/tools/rplot.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 48be5325adff0..3f0aed757b179 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -640,15 +640,35 @@ class RPlot: The main plot object. Add layers to an instance of this object to create a plot. """ def __init__(self, data, x=None, y=None): + """Initialize RPlot instance. + + Parameters: + ----------- + data: pandas DataFrame instance + x: string, DataFrame column name + y: string, DataFrame column name + """ self.layers = [Layer(data, default_aes(x=x, y=y))] trellised = False def __add__(self, other): + """Add a layer to RPlot instance. + + Parameters: + ----------- + other: Layer instance + """ if not isinstance(other, Layer): raise TypeError("The operand on the right side of + must be a Layer instance") self.layers.append(other) - def show(self, fig=None): + def render(self, fig=None): + """Render all the layers on a matplotlib figure. + + Parameters: + ----------- + fig: matplotlib figure + """ if fig is None: fig = plt.gcf() # Look for the last TrellisGrid instance in the layer list From 5bb7e4edef1b9c082cab9a1f5ad142709606b1b9 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Thu, 2 Aug 2012 22:41:58 +0300 Subject: [PATCH 48/88] Allow users to specify non-callable objects in make_aes --- pandas/tools/rplot.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 3f0aed757b179..bd5a4a0abf76a 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -192,6 +192,14 @@ def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): -------- a dictionary with aesthetics bindings """ + if not hasattr(size, '__call__'): + size = scale_constant(size) + if not hasattr(colour, '__call__'): + colour = scale_constant(colour) + if not hasattr(shape, '__call__'): + shape = scale_constant(shape) + if not hasattr(alpha, '__call__'): + alpha = scale_constant(alpha) return { 'x' : x, 'y' : y, From 0312253b3f9915f2c0c4d9d471f2f37c894d79e4 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 3 Aug 2012 00:35:22 +0300 Subject: [PATCH 49/88] Implemented scale_shape as a class --- pandas/tools/rplot.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index bd5a4a0abf76a..7060efe8add88 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -120,6 +120,19 @@ def scaler(data, index): b2 + (b3 - b2) * x_scaled) return scaler +class ScaleShape: + def __init__(self, column): + self.column = column + self.shapes = ['o', 'D', 'h', 'H', '_', '8', 'p', '+', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] + self.legend = set([]) + + def __call__(self, data, index): + values = list(set(data[self.column])) + x = data[self.column].iget(index) + legend = "%s = %s" % (self.column, str(x)) + self.legend.add(legend) + return self.shapes[values.index(x)] + def scale_shape(column): """Create a function that converts between a categorical value and a scatter plot shape. From a6fed3f76cb53e8c74115d5d9944d2f22962f58d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sun, 5 Aug 2012 23:44:22 +0300 Subject: [PATCH 50/88] Added dictionary_union function --- pandas/tools/rplot.py | 52 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 7060efe8add88..f456791952970 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -120,17 +120,18 @@ def scaler(data, index): b2 + (b3 - b2) * x_scaled) return scaler -class ScaleShape: +class Scale: + pass + +class ScaleShape(Scale): def __init__(self, column): self.column = column self.shapes = ['o', 'D', 'h', 'H', '_', '8', 'p', '+', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] - self.legend = set([]) + self.legends = set([]) def __call__(self, data, index): values = list(set(data[self.column])) x = data[self.column].iget(index) - legend = "%s = %s" % (self.column, str(x)) - self.legend.add(legend) return self.shapes[values.index(x)] def scale_shape(column): @@ -273,6 +274,7 @@ def work(self, fig=None, ax=None): return fig, ax else: ax = fig.gca() + legend = {} for index in range(len(self.data)): row = self.data.irow(index) x = row[self.aes['x']] @@ -281,14 +283,22 @@ def work(self, fig=None, ax=None): colour_scaler = self.aes['colour'] shape_scaler = self.aes['shape'] alpha = self.aes['alpha'] - ax.scatter(x, y, - s=size_scaler(self.data, index), - c=colour_scaler(self.data, index), - marker=shape_scaler(self.data, index), - alpha=alpha(self.data, index)) + size_value = size_scaler(self.data, index) + colour_value = colour_scaler(self.data, index) + marker_value = shape_scaler(self.data, index) + alpha_value = alpha(self.data, index) + patch = ax.scatter(x, y, + s=size_value, + c=colour_value, + marker=marker_value, + alpha=alpha_value) + if colour_scaler.categorical: + legend[(colour_value, marker_value)] = patch + else: + legend[(marker_value)] = patch ax.set_xlabel(self.aes['x']) ax.set_ylabel(self.aes['y']) - return fig, ax + return fig, ax, legend class GeomPolyFit(Layer): """ @@ -544,6 +554,28 @@ def trellis(self, layers): trellised_layers.append(trellised) return trellised_layers +def dictionary_union(dict1, dict2): + """Take two dictionaries, return dictionary union. + + Parameters: + ----------- + dict1: Python dictionary + dict2: Python dictionary + + Returns: + -------- + A union of the dictionaries. It assumes that values + with the same keys are identical. + """ + keys1 = dict1.keys() + keys2 = dict2.keys() + result = {} + for key1 in keys1: + result[key1] = dict1[key1] + for key2 in keys2: + result[key2] = dict2[key2] + return result + def merge_aes(layer1, layer2): """Merges the aesthetics dictionaries for the two layers. Look up sequence_layers function. Which layer is first and which From 14b85d3a8f27a1a1bfa625a1b844f16a74edbee3 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sun, 5 Aug 2012 23:54:14 +0300 Subject: [PATCH 51/88] Work towards displaying legends properly --- pandas/tools/rplot.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index f456791952970..3923d7217c57a 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -240,6 +240,7 @@ def __init__(self, data=None, aes=None): self.aes = make_aes() else: self.aes = aes + self.legend = {} def work(self, fig=None, ax=None): """Do the drawing (usually) work. @@ -274,7 +275,6 @@ def work(self, fig=None, ax=None): return fig, ax else: ax = fig.gca() - legend = {} for index in range(len(self.data)): row = self.data.irow(index) x = row[self.aes['x']] @@ -293,12 +293,12 @@ def work(self, fig=None, ax=None): marker=marker_value, alpha=alpha_value) if colour_scaler.categorical: - legend[(colour_value, marker_value)] = patch + self.legend[(colour_value, marker_value)] = patch else: - legend[(marker_value)] = patch + self.legend[(marker_value)] = patch ax.set_xlabel(self.aes['x']) ax.set_ylabel(self.aes['y']) - return fig, ax, legend + return fig, ax class GeomPolyFit(Layer): """ @@ -642,7 +642,7 @@ def work_grid(grid, fig): grid[row][col].work(ax=axes[row][col]) return axes -def adjust_subplots(fig, axes, trellis): +def adjust_subplots(fig, axes, trellis, layers): """Adjust the subtplots on matplotlib figure with the fact that we have a trellis plot in mind. From 4534742c040914f629d9021707da66578ac26883 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 6 Aug 2012 13:28:15 +0300 Subject: [PATCH 52/88] Implemented legend display for scatter plots --- pandas/tools/rplot.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 3923d7217c57a..3fc24ee53264d 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -293,9 +293,9 @@ def work(self, fig=None, ax=None): marker=marker_value, alpha=alpha_value) if colour_scaler.categorical: - self.legend[(colour_value, marker_value)] = patch + self.legend[(colour_scaler.column, colour_value, shape_scaler.column, marker_value)] = patch else: - self.legend[(marker_value)] = patch + self.legend[(shape_scaler.column, marker_value)] = patch ax.set_xlabel(self.aes['x']) ax.set_ylabel(self.aes['y']) return fig, ax @@ -651,6 +651,7 @@ def adjust_subplots(fig, axes, trellis, layers): fig: matplotlib figure axes: a two dimensional grid of matplotlib axes trellis: TrellisGrid object + layers: last grid of layers in the plot """ # Flatten the axes grid axes = [ax for row in axes for ax in row] @@ -686,6 +687,25 @@ def adjust_subplots(fig, axes, trellis, layers): cellColours=[['lightgrey'], ['lightgrey']]) else: axis.table(cellText=[[label1]], loc='top', cellLoc='center', cellColours=[['lightgrey']]) + # Flatten the layer grid + layers = [layer for row in layers for layer in row] + legend = {} + for layer in layers: + legend = dictionary_union(legend, layer.legend) + patches = [] + labels = [] + for key in legend.keys(): + value = legend[key] + patches.append(value) + if len(key) == 1: + col, val = keys + labels.append("%s = %s" % (col, str(val))) + elif len(key) == 2: + col1, val1, col2, val2 = key + labels.append("%s = %s, %s = %s" % (col1, str(val1), col2, str(val2))) + else: + raise ValueError("Maximum 2 categorical attributes to display a lengend of") + fig.legend(patches, labels, loc='upper right') fig.subplots_adjust(wspace=0.05, hspace=0.2) class RPlot: @@ -749,6 +769,6 @@ def render(self, fig=None): new_layers = sequence_grids(new_layers) axes_grids = [work_grid(grid, fig) for grid in new_layers] axes_grid = axes_grids[-1] - adjust_subplots(fig, axes_grid, last_trellis) + adjust_subplots(fig, axes_grid, last_trellis, new_layers[-1]) # And we're done return fig \ No newline at end of file From 900b2ca60d706fc937228c537593f5c2e628af40 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 6 Aug 2012 16:33:36 +0300 Subject: [PATCH 53/88] More progress towards correct legend display --- pandas/tools/rplot.py | 44 +++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 3fc24ee53264d..d7012eabc9a9b 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -126,8 +126,9 @@ class Scale: class ScaleShape(Scale): def __init__(self, column): self.column = column - self.shapes = ['o', 'D', 'h', 'H', '_', '8', 'p', '+', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] + self.shapes = ['o', '+', '8', 'p', 'D', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] self.legends = set([]) + self.categorical = True def __call__(self, data, index): values = list(set(data[self.column])) @@ -153,6 +154,14 @@ def scaler(data, index): return shapes[values.index(x)] return scaler +class ScaleConstant(Scale): + def __init__(self, value): + self.value = value + self.categorical = False + + def __call__(self, data, index): + return self.value + def scale_constant(constant): """Create a function that always returns a specified constant value. @@ -184,10 +193,10 @@ def default_aes(x=None, y=None): return { 'x' : x, 'y' : y, - 'size' : scale_constant(40.0), - 'colour' : scale_constant('grey'), - 'shape' : scale_constant('o'), - 'alpha' : scale_constant(1.0), + 'size' : ScaleConstant(40.0), + 'colour' : ScaleConstant('grey'), + 'shape' : ScaleConstant('o'), + 'alpha' : ScaleConstant(1.0), } def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): @@ -207,13 +216,13 @@ def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): a dictionary with aesthetics bindings """ if not hasattr(size, '__call__'): - size = scale_constant(size) + size = ScaleConstant(size) if not hasattr(colour, '__call__'): - colour = scale_constant(colour) + colour = ScaleConstant(colour) if not hasattr(shape, '__call__'): - shape = scale_constant(shape) + shape = ScaleConstant(shape) if not hasattr(alpha, '__call__'): - alpha = scale_constant(alpha) + alpha = ScaleConstant(alpha) return { 'x' : x, 'y' : y, @@ -292,10 +301,12 @@ def work(self, fig=None, ax=None): c=colour_value, marker=marker_value, alpha=alpha_value) + label = [] if colour_scaler.categorical: - self.legend[(colour_scaler.column, colour_value, shape_scaler.column, marker_value)] = patch - else: - self.legend[(shape_scaler.column, marker_value)] = patch + label += [colour_scaler.column, row[contour_scaler.column]] + elif shape_scaler.categorical: + label += [shape_scaler.column, row[shape_scaler.column]] + self.legend[tuple(label)] = patch ax.set_xlabel(self.aes['x']) ax.set_ylabel(self.aes['y']) return fig, ax @@ -697,15 +708,16 @@ def adjust_subplots(fig, axes, trellis, layers): for key in legend.keys(): value = legend[key] patches.append(value) - if len(key) == 1: - col, val = keys + if len(key) == 2: + col, val = key labels.append("%s = %s" % (col, str(val))) - elif len(key) == 2: + elif len(key) == 4: col1, val1, col2, val2 = key labels.append("%s = %s, %s = %s" % (col1, str(val1), col2, str(val2))) else: raise ValueError("Maximum 2 categorical attributes to display a lengend of") - fig.legend(patches, labels, loc='upper right') + if len(legend): + fig.legend(patches, labels, loc='upper right') fig.subplots_adjust(wspace=0.05, hspace=0.2) class RPlot: From ca8f3f8339704cff916d63de5e50df1c16809251 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 6 Aug 2012 21:58:12 +0300 Subject: [PATCH 54/88] ScaleShape fixes --- pandas/tools/rplot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index d7012eabc9a9b..244b9834fa8aa 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -126,12 +126,14 @@ class Scale: class ScaleShape(Scale): def __init__(self, column): self.column = column - self.shapes = ['o', '+', '8', 'p', 'D', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] + self.shapes = ['o', '+', 's', '*', '^', '<', '>', 'v', '|', 'x'] self.legends = set([]) self.categorical = True def __call__(self, data, index): - values = list(set(data[self.column])) + values = sorted(list(set(data[self.column]))) + if len(values) != len(self.shapes): + raise ValueError("Too many different values of the categorical attribute for ScaleShape") x = data[self.column].iget(index) return self.shapes[values.index(x)] From 149a8f30091496b35f41c03b9e2bc2947a1ebb9b Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 6 Aug 2012 22:03:15 +0300 Subject: [PATCH 55/88] Fix a mistake with ScaleShape --- pandas/tools/rplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 244b9834fa8aa..7db9417cedd34 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -132,7 +132,7 @@ def __init__(self, column): def __call__(self, data, index): values = sorted(list(set(data[self.column]))) - if len(values) != len(self.shapes): + if len(values) > len(self.shapes): raise ValueError("Too many different values of the categorical attribute for ScaleShape") x = data[self.column].iget(index) return self.shapes[values.index(x)] From 47e5f8f11bf58b733c109146d920969993bed984 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 7 Aug 2012 21:40:44 +0300 Subject: [PATCH 56/88] Make sure the legend is nicely sorted --- pandas/tools/rplot.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 7db9417cedd34..742fe1c23ffe8 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -137,6 +137,15 @@ def __call__(self, data, index): x = data[self.column].iget(index) return self.shapes[values.index(x)] +class ScaleRandomColour(Scale): + def __init__(self, column): + self.column = column + self.categorical = True + + def __call__(self, data, index): + random.seed(data[self.column].iget(index)) + return [random.random() for _ in range(3)] + def scale_shape(column): """Create a function that converts between a categorical value and a scatter plot shape. @@ -305,8 +314,8 @@ def work(self, fig=None, ax=None): alpha=alpha_value) label = [] if colour_scaler.categorical: - label += [colour_scaler.column, row[contour_scaler.column]] - elif shape_scaler.categorical: + label += [colour_scaler.column, row[colour_scaler.column]] + if shape_scaler.categorical: label += [shape_scaler.column, row[shape_scaler.column]] self.legend[tuple(label)] = patch ax.set_xlabel(self.aes['x']) @@ -707,15 +716,15 @@ def adjust_subplots(fig, axes, trellis, layers): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - for key in legend.keys(): + for key in sorted(legend.keys(), key=lambda tup: (tup[1], tup[3])): value = legend[key] patches.append(value) if len(key) == 2: col, val = key - labels.append("%s = %s" % (col, str(val))) + labels.append("%s" % str(val)) elif len(key) == 4: col1, val1, col2, val2 = key - labels.append("%s = %s, %s = %s" % (col1, str(val1), col2, str(val2))) + labels.append("%s, %s" % (str(val1), str(val2))) else: raise ValueError("Maximum 2 categorical attributes to display a lengend of") if len(legend): From 6880b19528f073f99d6a9b10422e9244824da902 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 8 Aug 2012 13:14:48 +0300 Subject: [PATCH 57/88] Added test_rplot.py file --- pandas/tests/test_rplot.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 pandas/tests/test_rplot.py diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py new file mode 100644 index 0000000000000..1a439b84da0ba --- /dev/null +++ b/pandas/tests/test_rplot.py @@ -0,0 +1,5 @@ +import unittest +import pandas.tools.rplot as rplot + +class TestUtilityFunctions(unittest.TestCase): + pass \ No newline at end of file From 2f24a26df884bcfd52e2496b12f058982137c27e Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 8 Aug 2012 13:20:16 +0300 Subject: [PATCH 58/88] Added docstrings to ScaleConstant class --- pandas/tools/rplot.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 742fe1c23ffe8..06b6bc4783bb8 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -166,28 +166,32 @@ def scaler(data, index): return scaler class ScaleConstant(Scale): + """ + Constant returning scale. Usually used automatically. + """ def __init__(self, value): + """Initialize ScaleConstant instance. + + Parameters: + ----------- + value: any Python value to be returned when called + """ self.value = value self.categorical = False def __call__(self, data, index): - return self.value - -def scale_constant(constant): - """Create a function that always returns a specified constant value. + """Return the constant value. - Parameters: - ----------- - constant: a Python object to be returned + Parameters: + ----------- + data: pandas DataFrame + index: pandas DataFrame row index - Returns: - -------- - a two argument function, takes DataFrame and row index, - returns specified value - """ - def scaler(data, index): - return constant - return scaler + Returns: + -------- + A constant value specified during initialisation + """ + return self.value def default_aes(x=None, y=None): """Create the default aesthetics dictionary. From 097d4b5b280e879ef97811fbb6d84643ae54829d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 8 Aug 2012 13:27:41 +0300 Subject: [PATCH 59/88] Finished implementing ScaleShape class, complete with docstrings --- pandas/tools/rplot.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 06b6bc4783bb8..d89664668dad9 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -124,13 +124,34 @@ class Scale: pass class ScaleShape(Scale): + """ + Provides a mapping between matplotlib marker shapes + and attribute values. + """ def __init__(self, column): + """Initialize ScaleShape instance. + + Parameters: + ----------- + column: string, pandas DataFrame column name + """ self.column = column self.shapes = ['o', '+', 's', '*', '^', '<', '>', 'v', '|', 'x'] self.legends = set([]) self.categorical = True def __call__(self, data, index): + """Returns a matplotlib marker identifier. + + Parameters: + ----------- + data: pandas DataFrame + index: pandas DataFrame row index + + Returns: + -------- + a matplotlib marker identifier + """ values = sorted(list(set(data[self.column]))) if len(values) > len(self.shapes): raise ValueError("Too many different values of the categorical attribute for ScaleShape") @@ -146,25 +167,6 @@ def __call__(self, data, index): random.seed(data[self.column].iget(index)) return [random.random() for _ in range(3)] -def scale_shape(column): - """Create a function that converts between a categorical value and a scatter plot shape. - - Parameters: - ----------- - column: string, a column name to use - - Returns: - -------- - a function of two arguments, takes DataFrame and row index, return string with - matplotlib marker character - """ - shapes = ['o', 'D', 'h', 'H', '_', '8', 'p', '+', '.', 's', '*', 'd', '^', '<', '>', 'v', '|', 'x'] - def scaler(data, index): - values = list(set(data[column])) - x = data[column].iget(index) - return shapes[values.index(x)] - return scaler - class ScaleConstant(Scale): """ Constant returning scale. Usually used automatically. From 3fbc253402db6dff52af74d842978670801256a0 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 8 Aug 2012 14:33:14 +0300 Subject: [PATCH 60/88] Implemented ScaleSize as a class --- pandas/tools/rplot.py | 91 ++++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 39 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index d89664668dad9..1bb9cc52426a4 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -10,45 +10,6 @@ # * Make sure legends work properly # -def scale_random_colour(column): - """Creates a function that assigns each value in a DataFrame - column a random colour from RGB space. - - Parameters: - ----------- - column: string, a column name - - Returns: - -------- - a function of two arguments that takes a data set and row number, returns - a list of three elements, representing an RGB colour - """ - def scaler(data, index): - random.seed(data[column].iget(index)) - return [random.random() for _ in range(3)] - return scaler - -def scale_size(column, min_size=1.0, max_size=80.0): - """Creates a function that converts between a data attribute to point size. - - Parameters: - ----------- - column: string, a column name - categorical: boolean, true if the column contains categorical data - min_size: float, minimum point size - max_size: float, maximum point size - - Returns: - -------- - a function of two arguments that takes a data set and a row number, returns float - """ - def scaler(data, index): - x = data[column].iget(index) - a = float(min(data[column])) - b = float(max(data[column])) - return min_size + ((x - a) / (b - a)) * (max_size - min_size) - return scaler - def scale_gradient(column, categorical, colour1=(0.0, 0.0, 0.0), colour2=(1.0, 0.7, 0.8)): """Create a function that converts between a data attribute value to a point in colour space between two specified colours. @@ -123,6 +84,41 @@ def scaler(data, index): class Scale: pass +class ScaleSize(Scale): + """ + Provide a mapping between a DataFrame column and matplotlib + scatter plot shape size. + """ + def __init__(self, column, min_size=5.0, max_size=100.0, transform=lambda x: x): + """Initialize ScaleSize instance. + + Parameters: + ----------- + column: string, a column name + min_size: float, minimum point size + max_size: float, maximum point size + transform: a one argument function of form float -> float (e.g. lambda x: log(x)) + """ + self.column = column + self.min_size = min_size + self.max_size = max_size + self.transform = transform + self.categorical = False + + def __call__(self, data, index): + """Return matplotlib scatter plot marker shape size. + + Parameters: + ----------- + data: pandas DataFrame + index: pandas DataFrame row index + """ + x = data[column].iget(index) + a = float(min(data[column])) + b = float(max(data[column])) + return self.transform(self.min_size + ((x - a) / (b - a)) * + (self.max_size - self.min_size)) + class ScaleShape(Scale): """ Provides a mapping between matplotlib marker shapes @@ -159,11 +155,28 @@ def __call__(self, data, index): return self.shapes[values.index(x)] class ScaleRandomColour(Scale): + """ + Maps a random colour to a DataFrame attribute. + """ def __init__(self, column): + """Initialize ScaleRandomColour instance. + + Parameters: + ----------- + column: string, pandas DataFrame column name + """ self.column = column self.categorical = True def __call__(self, data, index): + """Return a tuple of three floats, representing + an RGB colour. + + Parameters: + ----------- + data: pandas DataFrame + index: pandas DataFrame row index + """ random.seed(data[self.column].iget(index)) return [random.random() for _ in range(3)] From 83541cb8f5dcd7acd1efe79541529a871151087d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 8 Aug 2012 15:40:36 +0300 Subject: [PATCH 61/88] Implemented ScaleGradient as a class --- pandas/tools/rplot.py | 82 +++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 1bb9cc52426a4..39178f0d359f4 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -10,37 +10,6 @@ # * Make sure legends work properly # -def scale_gradient(column, categorical, colour1=(0.0, 0.0, 0.0), colour2=(1.0, 0.7, 0.8)): - """Create a function that converts between a data attribute value to a - point in colour space between two specified colours. - - Parameters: - ----------- - column: string, a column name - categorical: boolean, true if the column contains categorical data - colour1: a tuple with three float values specifying rgb components - colour2: a tuple with three float values specifying rgb components - - Returns: - -------- - a function of two arguments that takes a data set and a row number, returns a - tuple with three float values with rgb component values. - """ - def scaler(data, index): - if categorical: - pass - else: - x = data[column].iget(index) - a = min(data[column]) - b = max(data[column]) - r1, g1, b1 = colour1 - r2, g2, b2 = colour2 - x_scaled = (x - a) / (b - a) - return (r1 + (r2 - r1) * x_scaled, - g1 + (g2 - g1) * x_scaled, - b1 + (b2 - b1) * x_scaled) - return scaler - def scale_gradient2(column, categorical, colour1=(0.0, 0.0, 0.0), colour2=(1.0, 0.7, 0.8), colour3=(0.2, 1.0, 0.5)): """Create a function that converts between a data attribute value to a point in colour space between three specified colours. @@ -82,8 +51,59 @@ def scaler(data, index): return scaler class Scale: + """ + Base class for mapping between graphical and data attributes. + """ pass +class ScaleGradient(Scale): + """ + A mapping between a data attribute value and a + point in colour space between two specified colours. + """ + def __init__(self, column, colour1, colour2): + """Initialize ScaleGradient instance. + + Parameters: + ----------- + column: string, pandas DataFrame column name + colour1: tuple, 3 element tuple with float values representing an RGB colour + colour2: tuple, 3 element tuple with float values representing an RGB colour + """ + self.column = column + self.colour1 = colour1 + self.colour2 = colour2 + self.categorical = False + + def __call__(self, data, index): + """Return a colour corresponding to data attribute value. + + Parameters: + ----------- + data: pandas DataFrame + index: pandas DataFrame row index + + Returns: + -------- + A three element tuple representing an RGB somewhere between colour1 and colour2 + """ + x = data[column].iget(index) + a = min(data[column]) + b = max(data[column]) + r1, g1, b1 = self.colour1 + r2, g2, b2 = self.colour2 + x_scaled = (x - a) / (b - a) + return (r1 + (r2 - r1) * x_scaled, + g1 + (g2 - g1) * x_scaled, + b1 + (b2 - b1) * x_scaled) + +class ScaleGradient2(Scale): + def __init__(self, column, colour1, colour2, colour3): + pass + + def __call__(self, data, index): + pass + class ScaleSize(Scale): """ Provide a mapping between a DataFrame column and matplotlib From 796bd5a882a6a1acf48b4b1ecce03b7f0d76f17a Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 8 Aug 2012 15:46:56 +0300 Subject: [PATCH 62/88] Implemented ScaleGradient2 as a class --- pandas/tools/rplot.py | 89 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 39178f0d359f4..ee1b39b93e9bf 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -10,46 +10,6 @@ # * Make sure legends work properly # -def scale_gradient2(column, categorical, colour1=(0.0, 0.0, 0.0), colour2=(1.0, 0.7, 0.8), colour3=(0.2, 1.0, 0.5)): - """Create a function that converts between a data attribute value to a - point in colour space between three specified colours. - - Parameters: - ----------- - column: string, a column name - categorical: boolean, true if the column contains categorical data - colour1: a tuple with three float values specifying rgb components - colour2: a tuple with three float values specifying rgb components - colour3: a tuple with three float values specifying rgb components - - Returns: - -------- - a function of two arguments that takes a data set and a row number, returns a - tuple with three float values with rgb component values. - """ - def scaler(data, index): - if categorical: - pass - else: - x = data[column].iget(index) - a = min(data[column]) - b = max(data[column]) - r1, g1, b1 = colour1 - r2, g2, b2 = colour2 - r3, g3, b3 = colour3 - x_scaled = (x - a) / (b - a) - if x_scaled < 0.5: - x_scaled *= 2.0 - return (r1 + (r2 - r1) * x_scaled, - g1 + (g2 - g1) * x_scaled, - b1 + (b2 - b1) * x_scaled) - else: - x_scaled = (x_scaled - 0.5) * 2.0 - return (r2 + (r3 - r2) * x_scaled, - g2 + (g3 - g2) * x_scaled, - b2 + (b3 - b2) * x_scaled) - return scaler - class Scale: """ Base class for mapping between graphical and data attributes. @@ -98,11 +58,56 @@ def __call__(self, data, index): b1 + (b2 - b1) * x_scaled) class ScaleGradient2(Scale): + """ + Create a mapping between a data attribute value and a + point in colour space in a line of three specified colours. + """ def __init__(self, column, colour1, colour2, colour3): - pass + """Initialize ScaleGradient2 instance. + + Parameters: + ----------- + column: string, pandas DataFrame column name + colour1: tuple, 3 element tuple with float values representing an RGB colour + colour2: tuple, 3 element tuple with float values representing an RGB colour + colour3: tuple, 3 element tuple with float values representing an RGB colour + """ + self.column = column + self.colour1 = colour1 + self.colour2 = colour2 + self.colour3 = colour3 + self.categorical = False def __call__(self, data, index): - pass + """Return a colour corresponding to data attribute value. + + Parameters: + ----------- + data: pandas DataFrame + index: pandas DataFrame row index + + Returns: + -------- + A three element tuple representing an RGB somewhere along the line + of colour1, colour2 and colour3 + """ + x = data[column].iget(index) + a = min(data[column]) + b = max(data[column]) + r1, g1, b1 = colour1 + r2, g2, b2 = colour2 + r3, g3, b3 = colour3 + x_scaled = (x - a) / (b - a) + if x_scaled < 0.5: + x_scaled *= 2.0 + return (r1 + (r2 - r1) * x_scaled, + g1 + (g2 - g1) * x_scaled, + b1 + (b2 - b1) * x_scaled) + else: + x_scaled = (x_scaled - 0.5) * 2.0 + return (r2 + (r3 - r2) * x_scaled, + g2 + (g3 - g2) * x_scaled, + b2 + (b3 - b2) * x_scaled) class ScaleSize(Scale): """ From f96732b8b1441b3a15b6bb65c553f044c932b1d7 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 8 Aug 2012 15:52:57 +0300 Subject: [PATCH 63/88] Simplify the Layer initialization method --- pandas/tools/rplot.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index ee1b39b93e9bf..12723d10ea74a 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -291,7 +291,7 @@ class Layer: """ Layer object representing a single plot layer. """ - def __init__(self, data=None, aes=None): + def __init__(self, data=None, **kwds): """Initialize layer object. Parameters: @@ -300,10 +300,7 @@ def __init__(self, data=None, aes=None): aes: aesthetics dictionary with bindings """ self.data = data - if aes is None: - self.aes = make_aes() - else: - self.aes = aes + self.aes = make_aes(**kwds) self.legend = {} def work(self, fig=None, ax=None): From 97ebee645f300719f19a66538a7828e0c3623b36 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 10 Aug 2012 01:22:32 +0300 Subject: [PATCH 64/88] Fix a problem with aes dictionaries --- pandas/tests/test_rplot.py | 2 +- pandas/tools/rplot.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 1a439b84da0ba..6e7aed2349a05 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -2,4 +2,4 @@ import pandas.tools.rplot as rplot class TestUtilityFunctions(unittest.TestCase): - pass \ No newline at end of file + pass diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 12723d10ea74a..f493373c4aa39 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -270,13 +270,13 @@ def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): -------- a dictionary with aesthetics bindings """ - if not hasattr(size, '__call__'): + if not hasattr(size, '__call__') and size is not None: size = ScaleConstant(size) - if not hasattr(colour, '__call__'): + if not hasattr(colour, '__call__') and colour is not None: colour = ScaleConstant(colour) - if not hasattr(shape, '__call__'): + if not hasattr(shape, '__call__') and shape is not None: shape = ScaleConstant(shape) - if not hasattr(alpha, '__call__'): + if not hasattr(alpha, '__call__') and alpha is not None: alpha = ScaleConstant(alpha) return { 'x' : x, @@ -785,7 +785,7 @@ def __init__(self, data, x=None, y=None): x: string, DataFrame column name y: string, DataFrame column name """ - self.layers = [Layer(data, default_aes(x=x, y=y))] + self.layers = [Layer(data, **default_aes(x=x, y=y))] trellised = False def __add__(self, other): From 407528944bacf3be768e505331450fa9a871f84d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 10 Aug 2012 20:42:07 +0300 Subject: [PATCH 65/88] Added a ScaleGradient test class --- pandas/tests/test_rplot.py | 31 +++++++++++++++++++++++++++++++ pandas/tools/rplot.py | 6 +++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 6e7aed2349a05..ab9d09778fdb2 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -1,5 +1,36 @@ import unittest import pandas.tools.rplot as rplot +from pandas import read_csv +import os + +def curpath(): + pth, _ = os.path.split(os.path.abspath(__file__)) + return pth class TestUtilityFunctions(unittest.TestCase): pass + +class TestScaleGradient(unittest.TestCase): + def setUp(self): + path = os.path.join(curpath(), 'data/iris.csv') + self.data = read_csv(path, sep=',') + self.gradient = rplot.ScaleGradient("SepalLength", colour1=(0.2, 0.3, 0.4), colour2=(0.8, 0.7, 0.6)) + + def test_gradient(self): + for index in range(len(self.data)): + row = self.data.irow(index) + r, g, b = self.gradient(self.data, index) + r1, g1, b1 = self.gradient.colour1 + r2, g2, b2 = self.gradient.colour2 + self.assertGreaterEqual(r, r1) + self.assertGreaterEqual(g, g1) + self.assertGreaterEqual(b, b1) + self.assertLessEqual(r, r2) + self.assertLessEqual(g, g2) + self.assertLessEqual(b, b2) + +class TestScaleGradient2(unittest.TestCase): + pass + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index f493373c4aa39..e396bf8ba6dd4 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -47,9 +47,9 @@ def __call__(self, data, index): -------- A three element tuple representing an RGB somewhere between colour1 and colour2 """ - x = data[column].iget(index) - a = min(data[column]) - b = max(data[column]) + x = data[self.column].iget(index) + a = min(data[self.column]) + b = max(data[self.column]) r1, g1, b1 = self.colour1 r2, g2, b2 = self.colour2 x_scaled = (x - a) / (b - a) From 114c07c9e49736ac4be50c12fc738f7663642462 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 10 Aug 2012 21:40:37 +0300 Subject: [PATCH 66/88] Implemented a test class for ScaleGradient2, re-implemented tests for ScaleGradient --- pandas/tests/test_rplot.py | 52 +++++++++++++++++++++++++++++++++----- pandas/tools/rplot.py | 12 ++++----- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index ab9d09778fdb2..6c9098718d129 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -7,6 +7,24 @@ def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth +def between(a, b, x): + """Check if x is in the somewhere between a and b. + + Parameters: + ----------- + a: float, interval start + b: float, interval end + x: float, value to test for + + Returns: + -------- + True if x is between a and b, False otherwise + """ + if a < b: + return x >= a and x <= b + else: + return x <= a and x >= b + class TestUtilityFunctions(unittest.TestCase): pass @@ -22,15 +40,35 @@ def test_gradient(self): r, g, b = self.gradient(self.data, index) r1, g1, b1 = self.gradient.colour1 r2, g2, b2 = self.gradient.colour2 - self.assertGreaterEqual(r, r1) - self.assertGreaterEqual(g, g1) - self.assertGreaterEqual(b, b1) - self.assertLessEqual(r, r2) - self.assertLessEqual(g, g2) - self.assertLessEqual(b, b2) + self.assertTrue(between(r1, r2, r)) + self.assertTrue(between(g1, g2, g)) + self.assertTrue(between(b1, b2, b)) class TestScaleGradient2(unittest.TestCase): - pass + def setUp(self): + path = os.path.join(curpath(), 'data/iris.csv') + self.data = read_csv(path, sep=',') + self.gradient = rplot.ScaleGradient2("SepalLength", colour1=(0.2, 0.3, 0.4), colour2=(0.8, 0.7, 0.6), colour3=(0.5, 0.5, 0.5)) + + def test_gradient2(self): + for index in range(len(self.data)): + row = self.data.irow(index) + r, g, b = self.gradient(self.data, index) + r1, g1, b1 = self.gradient.colour1 + r2, g2, b2 = self.gradient.colour2 + r3, g3, b3 = self.gradient.colour3 + value = row[self.gradient.column] + a_ = min(self.data[self.gradient.column]) + b_ = max(self.data[self.gradient.column]) + scaled = (value - a_) / (b_ - a_) + if scaled < 0.5: + self.assertTrue(between(r1, r2, r)) + self.assertTrue(between(g1, g2, g)) + self.assertTrue(between(b1, b2, b)) + else: + self.assertTrue(between(r2, r3, r)) + self.assertTrue(between(g2, g3, g)) + self.assertTrue(between(b2, b3, b)) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index e396bf8ba6dd4..887457e779107 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -91,12 +91,12 @@ def __call__(self, data, index): A three element tuple representing an RGB somewhere along the line of colour1, colour2 and colour3 """ - x = data[column].iget(index) - a = min(data[column]) - b = max(data[column]) - r1, g1, b1 = colour1 - r2, g2, b2 = colour2 - r3, g3, b3 = colour3 + x = data[self.column].iget(index) + a = min(data[self.column]) + b = max(data[self.column]) + r1, g1, b1 = self.colour1 + r2, g2, b2 = self.colour2 + r3, g3, b3 = self.colour3 x_scaled = (x - a) / (b - a) if x_scaled < 0.5: x_scaled *= 2.0 From 9979de72c7de3eeba09510038553cb592d072901 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 10 Aug 2012 22:03:27 +0300 Subject: [PATCH 67/88] Added test_make_aes1 method --- pandas/tests/test_rplot.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 6c9098718d129..fee8bcb3c0fcb 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -26,7 +26,17 @@ def between(a, b, x): return x <= a and x >= b class TestUtilityFunctions(unittest.TestCase): - pass + """ + Tests for RPlot utility functions. + """ + def test_make_aes1(self): + aes = rplot.make_aes() + self.assertTrue(aes['x'] is None) + self.assertTrue(aes['y'] is None) + self.assertTrue(aes['size'] is None) + self.assertTrue(aes['colour'] is None) + self.assertTrue(aes['shape'] is None) + self.assertTrue(aes['alpha'] is None) class TestScaleGradient(unittest.TestCase): def setUp(self): From b46174cc57b90f0d8373c488df5d3821ed71047d Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 10 Aug 2012 22:18:31 +0300 Subject: [PATCH 68/88] Added type security checks to make_aes --- pandas/tools/rplot.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 887457e779107..a20892b1b7e68 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -278,6 +278,22 @@ def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): shape = ScaleConstant(shape) if not hasattr(alpha, '__call__') and alpha is not None: alpha = ScaleConstant(alpha) + if any([isinstance(size, scale) for scale in [ScaleConstant, ScaleSize]]): + pass + else: + raise ValueError('size mapping should be done through ScaleConstant or ScaleSize') + if any([isinstance(colour, scale) for scale in [ScaleConstant, ScaleGradient, ScaleGradient2]]): + pass + else: + raise ValueError('colour mapping should be done through ScaleConstant, ScaleGradient or ScaleGradient2') + if any([isinstance(shape, scale) for scale in [ScaleConstant, ScaleShape]]): + pass + else: + raise ValueError('shape mapping should be done through ScaleConstant or ScaleShape') + if any([isinstance(alpha, scale) for scale in [ScaleConstant]]): + pass + else: + raise ValueError('alpha mapping should be done through ScaleConstant') return { 'x' : x, 'y' : y, From 1b2ac0444a17268a3cf73f2ff3a1047c1dc15800 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 10 Aug 2012 23:11:17 +0300 Subject: [PATCH 69/88] Added test_make_aes2 method, fixed a bug with make_aes --- pandas/tests/test_rplot.py | 11 +++++++++++ pandas/tools/rplot.py | 8 ++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index fee8bcb3c0fcb..df3e9d58814ee 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -37,6 +37,17 @@ def test_make_aes1(self): self.assertTrue(aes['colour'] is None) self.assertTrue(aes['shape'] is None) self.assertTrue(aes['alpha'] is None) + self.assertTrue(type(aes) is dict) + + def test_make_aes2(self): + with self.assertRaises(ValueError): + rplot.make_aes(size=rplot.ScaleShape('test')) + with self.assertRaises(ValueError): + rplot.make_aes(colour=rplot.ScaleShape('test')) + with self.assertRaises(ValueError): + rplot.make_aes(shape=rplot.ScaleSize('test')) + with self.assertRaises(ValueError): + rplot.make_aes(alpha=rplot.ScaleShape('test')) class TestScaleGradient(unittest.TestCase): def setUp(self): diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index a20892b1b7e68..27da4cf6f8595 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -278,19 +278,19 @@ def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): shape = ScaleConstant(shape) if not hasattr(alpha, '__call__') and alpha is not None: alpha = ScaleConstant(alpha) - if any([isinstance(size, scale) for scale in [ScaleConstant, ScaleSize]]): + if any([isinstance(size, scale) for scale in [ScaleConstant, ScaleSize]]) or size is None: pass else: raise ValueError('size mapping should be done through ScaleConstant or ScaleSize') - if any([isinstance(colour, scale) for scale in [ScaleConstant, ScaleGradient, ScaleGradient2]]): + if any([isinstance(colour, scale) for scale in [ScaleConstant, ScaleGradient, ScaleGradient2]]) or colour is None: pass else: raise ValueError('colour mapping should be done through ScaleConstant, ScaleGradient or ScaleGradient2') - if any([isinstance(shape, scale) for scale in [ScaleConstant, ScaleShape]]): + if any([isinstance(shape, scale) for scale in [ScaleConstant, ScaleShape]]) or shape is None: pass else: raise ValueError('shape mapping should be done through ScaleConstant or ScaleShape') - if any([isinstance(alpha, scale) for scale in [ScaleConstant]]): + if any([isinstance(alpha, scale) for scale in [ScaleConstant]]) or alpha is None: pass else: raise ValueError('alpha mapping should be done through ScaleConstant') From e57af7e71ca6bb2f2e4058ba9856ba499ec778c9 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 10 Aug 2012 23:28:29 +0300 Subject: [PATCH 70/88] Added test_dictionary_union method to TestUtilityFunctions --- pandas/tests/test_rplot.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index df3e9d58814ee..335e8cdcc83ad 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -49,6 +49,20 @@ def test_make_aes2(self): with self.assertRaises(ValueError): rplot.make_aes(alpha=rplot.ScaleShape('test')) + def test_dictionary_union(self): + dict1 = {1 : 1, 2 : 2, 3 : 3} + dict2 = {1 : 1, 2 : 2, 4 : 4} + union = rplot.dictionary_union(dict1, dict2) + self.assertEqual(len(union), 4) + keys = union.keys() + self.assertTrue(1 in keys) + self.assertTrue(2 in keys) + self.assertTrue(3 in keys) + self.assertTrue(4 in keys) + self.assertTrue(rplot.dictionary_union(dict1, {}) == dict1) + self.assertTrue(rplot.dictionary_union({}, dict1) == dict1) + self.assertTrue(rplot.dictionary_union({}, {}) == {}) + class TestScaleGradient(unittest.TestCase): def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') From 67bfe032c0437b3f73aecafe9a0c1843bf7e8178 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Sat, 11 Aug 2012 00:42:50 +0300 Subject: [PATCH 71/88] Added test_merge_aes method --- pandas/tests/test_rplot.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 335e8cdcc83ad..67e04eb8257e4 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -63,6 +63,17 @@ def test_dictionary_union(self): self.assertTrue(rplot.dictionary_union({}, dict1) == dict1) self.assertTrue(rplot.dictionary_union({}, {}) == {}) + def test_merge_aes(self): + layer1 = rplot.Layer(size=rplot.ScaleSize('test')) + layer2 = rplot.Layer(shape=rplot.ScaleShape('test')) + rplot.merge_aes(layer1, layer2) + self.assertTrue(isinstance(layer2.aes['size'], rplot.ScaleSize)) + self.assertTrue(isinstance(layer2.aes['shape'], rplot.ScaleShape)) + self.assertTrue(layer2.aes['size'] == layer1.aes['size']) + for key in layer2.aes.keys(): + if key != 'size' and key != 'shape': + self.assertTrue(layer2.aes[key] is None) + class TestScaleGradient(unittest.TestCase): def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') From 3be9987a0253c73a4e0881d878300c9f5626daa8 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 13 Aug 2012 21:02:10 +0300 Subject: [PATCH 72/88] Added TestScaleRandomColour class --- pandas/tests/test_rplot.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 67e04eb8257e4..693f0e36bee35 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -116,5 +116,23 @@ def test_gradient2(self): self.assertTrue(between(g2, g3, g)) self.assertTrue(between(b2, b3, b)) +class TestScaleRandomColour(unittest.TestCase): + def setUp(self): + path = os.path.join(curpath(), 'data/iris.csv') + self.data = read_csv(path, sep=',') + self.colour = rplot.ScaleRandomColour('SepalLength') + + def test_random_colour(self): + for index in range(len(self.data)): + colour = self.colour(self.data, index) + self.assertEqual(len(colour), 3) + r, g, b = colour + self.assertGreaterEqual(r, 0.0) + self.assertGreaterEqual(g, 0.0) + self.assertGreaterEqual(b, 0.0) + self.assertLessEqual(r, 1.0) + self.assertLessEqual(g, 1.0) + self.assertLessEqual(b, 1.0) + if __name__ == '__main__': unittest.main() \ No newline at end of file From 5662b73b29c80a41a91b0b1ce9b6090909b35bed Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Mon, 13 Aug 2012 21:09:07 +0300 Subject: [PATCH 73/88] Added TestScaleConstant class --- pandas/tests/test_rplot.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 693f0e36bee35..06e75442f67f1 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -132,7 +132,14 @@ def test_random_colour(self): self.assertGreaterEqual(b, 0.0) self.assertLessEqual(r, 1.0) self.assertLessEqual(g, 1.0) - self.assertLessEqual(b, 1.0) + self.assertLessEqual(b, 1.0) + +class ScaleConstant(unittest.TestCase): + def test_scale_constant(self): + scale = rplot.ScaleConstant(1.0) + self.assertEqual(scale(None, None), 1.0) + scale = rplot.ScaleConstant("test") + self.assertEqual(scale(None, None), "test") if __name__ == '__main__': unittest.main() \ No newline at end of file From f240a25c007f08b8eacf01d493be5402a735a878 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 14 Aug 2012 00:17:28 +0300 Subject: [PATCH 74/88] Added TestScaleSize class --- pandas/tests/test_rplot.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 06e75442f67f1..3c3a81b463f60 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -134,12 +134,29 @@ def test_random_colour(self): self.assertLessEqual(g, 1.0) self.assertLessEqual(b, 1.0) -class ScaleConstant(unittest.TestCase): +class TestScaleConstant(unittest.TestCase): def test_scale_constant(self): scale = rplot.ScaleConstant(1.0) self.assertEqual(scale(None, None), 1.0) scale = rplot.ScaleConstant("test") self.assertEqual(scale(None, None), "test") +class TestScaleSize(unittest.TestCase): + def setUp(self): + path = os.path.join(curpath(), 'data/iris.csv') + self.data = read_csv(path, sep=',') + self.scale1 = rplot.ScaleShape('Name') + self.scale2 = rplot.ScaleShape('PetalLength') + + def test_scale_size(self): + for index in range(len(self.data)): + marker = self.scale1(self.data, index) + self.assertTrue(marker in ['o', '+', 's', '*', '^', '<', '>', 'v', '|', 'x']) + + def test_scale_overflow(self): + with self.assertRaises(ValueError): + for index in range(len(self.data)): + self.scale2(self.data, index) + if __name__ == '__main__': unittest.main() \ No newline at end of file From 97c000ddd83ec0f92d040a3a574d1d997759f4ee Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 14 Aug 2012 01:22:42 +0300 Subject: [PATCH 75/88] Added test_sequence_layers method --- pandas/tests/test_rplot.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 3c3a81b463f60..a9d51f47b90dc 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -29,6 +29,10 @@ class TestUtilityFunctions(unittest.TestCase): """ Tests for RPlot utility functions. """ + def setUp(self): + path = os.path.join(curpath(), 'data/iris.csv') + self.data = read_csv(path, sep=',') + def test_make_aes1(self): aes = rplot.make_aes() self.assertTrue(aes['x'] is None) @@ -74,6 +78,19 @@ def test_merge_aes(self): if key != 'size' and key != 'shape': self.assertTrue(layer2.aes[key] is None) + def test_sequence_layers(self): + layer1 = rplot.Layer(self.data) + layer2 = rplot.GeomPoint(x='SepalLength', y='SepalWidth', size=rplot.ScaleSize('PetalLength')) + layer3 = rplot.GeomPolyFit(2) + result = rplot.sequence_layers([layer1, layer2, layer3]) + self.assertEqual(len(result), 3) + last = result[-1] + self.assertEqual(last.aes['x'], 'SepalLength') + self.assertEqual(last.aes['y'], 'SepalWidth') + self.assertTrue(isinstance(last.aes['size'], rplot.ScaleSize)) + self.assertTrue(self.data is last.data) + self.assertTrue(rplot.sequence_layers([layer1])[0] is layer1) + class TestScaleGradient(unittest.TestCase): def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') From 27710c6095a368ef2e489b9fd775061d42b540fe Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 14 Aug 2012 01:46:45 +0300 Subject: [PATCH 76/88] Added TestTrellis class --- pandas/tests/data/tips.csv | 245 +++++++++++++++++++++++++++++++++++++ pandas/tests/test_rplot.py | 12 ++ 2 files changed, 257 insertions(+) create mode 100644 pandas/tests/data/tips.csv diff --git a/pandas/tests/data/tips.csv b/pandas/tests/data/tips.csv new file mode 100644 index 0000000000000..856a65a69e647 --- /dev/null +++ b/pandas/tests/data/tips.csv @@ -0,0 +1,245 @@ +total_bill,tip,sex,smoker,day,time,size +16.99,1.01,Female,No,Sun,Dinner,2 +10.34,1.66,Male,No,Sun,Dinner,3 +21.01,3.5,Male,No,Sun,Dinner,3 +23.68,3.31,Male,No,Sun,Dinner,2 +24.59,3.61,Female,No,Sun,Dinner,4 +25.29,4.71,Male,No,Sun,Dinner,4 +8.77,2.0,Male,No,Sun,Dinner,2 +26.88,3.12,Male,No,Sun,Dinner,4 +15.04,1.96,Male,No,Sun,Dinner,2 +14.78,3.23,Male,No,Sun,Dinner,2 +10.27,1.71,Male,No,Sun,Dinner,2 +35.26,5.0,Female,No,Sun,Dinner,4 +15.42,1.57,Male,No,Sun,Dinner,2 +18.43,3.0,Male,No,Sun,Dinner,4 +14.83,3.02,Female,No,Sun,Dinner,2 +21.58,3.92,Male,No,Sun,Dinner,2 +10.33,1.67,Female,No,Sun,Dinner,3 +16.29,3.71,Male,No,Sun,Dinner,3 +16.97,3.5,Female,No,Sun,Dinner,3 +20.65,3.35,Male,No,Sat,Dinner,3 +17.92,4.08,Male,No,Sat,Dinner,2 +20.29,2.75,Female,No,Sat,Dinner,2 +15.77,2.23,Female,No,Sat,Dinner,2 +39.42,7.58,Male,No,Sat,Dinner,4 +19.82,3.18,Male,No,Sat,Dinner,2 +17.81,2.34,Male,No,Sat,Dinner,4 +13.37,2.0,Male,No,Sat,Dinner,2 +12.69,2.0,Male,No,Sat,Dinner,2 +21.7,4.3,Male,No,Sat,Dinner,2 +19.65,3.0,Female,No,Sat,Dinner,2 +9.55,1.45,Male,No,Sat,Dinner,2 +18.35,2.5,Male,No,Sat,Dinner,4 +15.06,3.0,Female,No,Sat,Dinner,2 +20.69,2.45,Female,No,Sat,Dinner,4 +17.78,3.27,Male,No,Sat,Dinner,2 +24.06,3.6,Male,No,Sat,Dinner,3 +16.31,2.0,Male,No,Sat,Dinner,3 +16.93,3.07,Female,No,Sat,Dinner,3 +18.69,2.31,Male,No,Sat,Dinner,3 +31.27,5.0,Male,No,Sat,Dinner,3 +16.04,2.24,Male,No,Sat,Dinner,3 +17.46,2.54,Male,No,Sun,Dinner,2 +13.94,3.06,Male,No,Sun,Dinner,2 +9.68,1.32,Male,No,Sun,Dinner,2 +30.4,5.6,Male,No,Sun,Dinner,4 +18.29,3.0,Male,No,Sun,Dinner,2 +22.23,5.0,Male,No,Sun,Dinner,2 +32.4,6.0,Male,No,Sun,Dinner,4 +28.55,2.05,Male,No,Sun,Dinner,3 +18.04,3.0,Male,No,Sun,Dinner,2 +12.54,2.5,Male,No,Sun,Dinner,2 +10.29,2.6,Female,No,Sun,Dinner,2 +34.81,5.2,Female,No,Sun,Dinner,4 +9.94,1.56,Male,No,Sun,Dinner,2 +25.56,4.34,Male,No,Sun,Dinner,4 +19.49,3.51,Male,No,Sun,Dinner,2 +38.01,3.0,Male,Yes,Sat,Dinner,4 +26.41,1.5,Female,No,Sat,Dinner,2 +11.24,1.76,Male,Yes,Sat,Dinner,2 +48.27,6.73,Male,No,Sat,Dinner,4 +20.29,3.21,Male,Yes,Sat,Dinner,2 +13.81,2.0,Male,Yes,Sat,Dinner,2 +11.02,1.98,Male,Yes,Sat,Dinner,2 +18.29,3.76,Male,Yes,Sat,Dinner,4 +17.59,2.64,Male,No,Sat,Dinner,3 +20.08,3.15,Male,No,Sat,Dinner,3 +16.45,2.47,Female,No,Sat,Dinner,2 +3.07,1.0,Female,Yes,Sat,Dinner,1 +20.23,2.01,Male,No,Sat,Dinner,2 +15.01,2.09,Male,Yes,Sat,Dinner,2 +12.02,1.97,Male,No,Sat,Dinner,2 +17.07,3.0,Female,No,Sat,Dinner,3 +26.86,3.14,Female,Yes,Sat,Dinner,2 +25.28,5.0,Female,Yes,Sat,Dinner,2 +14.73,2.2,Female,No,Sat,Dinner,2 +10.51,1.25,Male,No,Sat,Dinner,2 +17.92,3.08,Male,Yes,Sat,Dinner,2 +27.2,4.0,Male,No,Thur,Lunch,4 +22.76,3.0,Male,No,Thur,Lunch,2 +17.29,2.71,Male,No,Thur,Lunch,2 +19.44,3.0,Male,Yes,Thur,Lunch,2 +16.66,3.4,Male,No,Thur,Lunch,2 +10.07,1.83,Female,No,Thur,Lunch,1 +32.68,5.0,Male,Yes,Thur,Lunch,2 +15.98,2.03,Male,No,Thur,Lunch,2 +34.83,5.17,Female,No,Thur,Lunch,4 +13.03,2.0,Male,No,Thur,Lunch,2 +18.28,4.0,Male,No,Thur,Lunch,2 +24.71,5.85,Male,No,Thur,Lunch,2 +21.16,3.0,Male,No,Thur,Lunch,2 +28.97,3.0,Male,Yes,Fri,Dinner,2 +22.49,3.5,Male,No,Fri,Dinner,2 +5.75,1.0,Female,Yes,Fri,Dinner,2 +16.32,4.3,Female,Yes,Fri,Dinner,2 +22.75,3.25,Female,No,Fri,Dinner,2 +40.17,4.73,Male,Yes,Fri,Dinner,4 +27.28,4.0,Male,Yes,Fri,Dinner,2 +12.03,1.5,Male,Yes,Fri,Dinner,2 +21.01,3.0,Male,Yes,Fri,Dinner,2 +12.46,1.5,Male,No,Fri,Dinner,2 +11.35,2.5,Female,Yes,Fri,Dinner,2 +15.38,3.0,Female,Yes,Fri,Dinner,2 +44.3,2.5,Female,Yes,Sat,Dinner,3 +22.42,3.48,Female,Yes,Sat,Dinner,2 +20.92,4.08,Female,No,Sat,Dinner,2 +15.36,1.64,Male,Yes,Sat,Dinner,2 +20.49,4.06,Male,Yes,Sat,Dinner,2 +25.21,4.29,Male,Yes,Sat,Dinner,2 +18.24,3.76,Male,No,Sat,Dinner,2 +14.31,4.0,Female,Yes,Sat,Dinner,2 +14.0,3.0,Male,No,Sat,Dinner,2 +7.25,1.0,Female,No,Sat,Dinner,1 +38.07,4.0,Male,No,Sun,Dinner,3 +23.95,2.55,Male,No,Sun,Dinner,2 +25.71,4.0,Female,No,Sun,Dinner,3 +17.31,3.5,Female,No,Sun,Dinner,2 +29.93,5.07,Male,No,Sun,Dinner,4 +10.65,1.5,Female,No,Thur,Lunch,2 +12.43,1.8,Female,No,Thur,Lunch,2 +24.08,2.92,Female,No,Thur,Lunch,4 +11.69,2.31,Male,No,Thur,Lunch,2 +13.42,1.68,Female,No,Thur,Lunch,2 +14.26,2.5,Male,No,Thur,Lunch,2 +15.95,2.0,Male,No,Thur,Lunch,2 +12.48,2.52,Female,No,Thur,Lunch,2 +29.8,4.2,Female,No,Thur,Lunch,6 +8.52,1.48,Male,No,Thur,Lunch,2 +14.52,2.0,Female,No,Thur,Lunch,2 +11.38,2.0,Female,No,Thur,Lunch,2 +22.82,2.18,Male,No,Thur,Lunch,3 +19.08,1.5,Male,No,Thur,Lunch,2 +20.27,2.83,Female,No,Thur,Lunch,2 +11.17,1.5,Female,No,Thur,Lunch,2 +12.26,2.0,Female,No,Thur,Lunch,2 +18.26,3.25,Female,No,Thur,Lunch,2 +8.51,1.25,Female,No,Thur,Lunch,2 +10.33,2.0,Female,No,Thur,Lunch,2 +14.15,2.0,Female,No,Thur,Lunch,2 +16.0,2.0,Male,Yes,Thur,Lunch,2 +13.16,2.75,Female,No,Thur,Lunch,2 +17.47,3.5,Female,No,Thur,Lunch,2 +34.3,6.7,Male,No,Thur,Lunch,6 +41.19,5.0,Male,No,Thur,Lunch,5 +27.05,5.0,Female,No,Thur,Lunch,6 +16.43,2.3,Female,No,Thur,Lunch,2 +8.35,1.5,Female,No,Thur,Lunch,2 +18.64,1.36,Female,No,Thur,Lunch,3 +11.87,1.63,Female,No,Thur,Lunch,2 +9.78,1.73,Male,No,Thur,Lunch,2 +7.51,2.0,Male,No,Thur,Lunch,2 +14.07,2.5,Male,No,Sun,Dinner,2 +13.13,2.0,Male,No,Sun,Dinner,2 +17.26,2.74,Male,No,Sun,Dinner,3 +24.55,2.0,Male,No,Sun,Dinner,4 +19.77,2.0,Male,No,Sun,Dinner,4 +29.85,5.14,Female,No,Sun,Dinner,5 +48.17,5.0,Male,No,Sun,Dinner,6 +25.0,3.75,Female,No,Sun,Dinner,4 +13.39,2.61,Female,No,Sun,Dinner,2 +16.49,2.0,Male,No,Sun,Dinner,4 +21.5,3.5,Male,No,Sun,Dinner,4 +12.66,2.5,Male,No,Sun,Dinner,2 +16.21,2.0,Female,No,Sun,Dinner,3 +13.81,2.0,Male,No,Sun,Dinner,2 +17.51,3.0,Female,Yes,Sun,Dinner,2 +24.52,3.48,Male,No,Sun,Dinner,3 +20.76,2.24,Male,No,Sun,Dinner,2 +31.71,4.5,Male,No,Sun,Dinner,4 +10.59,1.61,Female,Yes,Sat,Dinner,2 +10.63,2.0,Female,Yes,Sat,Dinner,2 +50.81,10.0,Male,Yes,Sat,Dinner,3 +15.81,3.16,Male,Yes,Sat,Dinner,2 +7.25,5.15,Male,Yes,Sun,Dinner,2 +31.85,3.18,Male,Yes,Sun,Dinner,2 +16.82,4.0,Male,Yes,Sun,Dinner,2 +32.9,3.11,Male,Yes,Sun,Dinner,2 +17.89,2.0,Male,Yes,Sun,Dinner,2 +14.48,2.0,Male,Yes,Sun,Dinner,2 +9.6,4.0,Female,Yes,Sun,Dinner,2 +34.63,3.55,Male,Yes,Sun,Dinner,2 +34.65,3.68,Male,Yes,Sun,Dinner,4 +23.33,5.65,Male,Yes,Sun,Dinner,2 +45.35,3.5,Male,Yes,Sun,Dinner,3 +23.17,6.5,Male,Yes,Sun,Dinner,4 +40.55,3.0,Male,Yes,Sun,Dinner,2 +20.69,5.0,Male,No,Sun,Dinner,5 +20.9,3.5,Female,Yes,Sun,Dinner,3 +30.46,2.0,Male,Yes,Sun,Dinner,5 +18.15,3.5,Female,Yes,Sun,Dinner,3 +23.1,4.0,Male,Yes,Sun,Dinner,3 +15.69,1.5,Male,Yes,Sun,Dinner,2 +19.81,4.19,Female,Yes,Thur,Lunch,2 +28.44,2.56,Male,Yes,Thur,Lunch,2 +15.48,2.02,Male,Yes,Thur,Lunch,2 +16.58,4.0,Male,Yes,Thur,Lunch,2 +7.56,1.44,Male,No,Thur,Lunch,2 +10.34,2.0,Male,Yes,Thur,Lunch,2 +43.11,5.0,Female,Yes,Thur,Lunch,4 +13.0,2.0,Female,Yes,Thur,Lunch,2 +13.51,2.0,Male,Yes,Thur,Lunch,2 +18.71,4.0,Male,Yes,Thur,Lunch,3 +12.74,2.01,Female,Yes,Thur,Lunch,2 +13.0,2.0,Female,Yes,Thur,Lunch,2 +16.4,2.5,Female,Yes,Thur,Lunch,2 +20.53,4.0,Male,Yes,Thur,Lunch,4 +16.47,3.23,Female,Yes,Thur,Lunch,3 +26.59,3.41,Male,Yes,Sat,Dinner,3 +38.73,3.0,Male,Yes,Sat,Dinner,4 +24.27,2.03,Male,Yes,Sat,Dinner,2 +12.76,2.23,Female,Yes,Sat,Dinner,2 +30.06,2.0,Male,Yes,Sat,Dinner,3 +25.89,5.16,Male,Yes,Sat,Dinner,4 +48.33,9.0,Male,No,Sat,Dinner,4 +13.27,2.5,Female,Yes,Sat,Dinner,2 +28.17,6.5,Female,Yes,Sat,Dinner,3 +12.9,1.1,Female,Yes,Sat,Dinner,2 +28.15,3.0,Male,Yes,Sat,Dinner,5 +11.59,1.5,Male,Yes,Sat,Dinner,2 +7.74,1.44,Male,Yes,Sat,Dinner,2 +30.14,3.09,Female,Yes,Sat,Dinner,4 +12.16,2.2,Male,Yes,Fri,Lunch,2 +13.42,3.48,Female,Yes,Fri,Lunch,2 +8.58,1.92,Male,Yes,Fri,Lunch,1 +15.98,3.0,Female,No,Fri,Lunch,3 +13.42,1.58,Male,Yes,Fri,Lunch,2 +16.27,2.5,Female,Yes,Fri,Lunch,2 +10.09,2.0,Female,Yes,Fri,Lunch,2 +20.45,3.0,Male,No,Sat,Dinner,4 +13.28,2.72,Male,No,Sat,Dinner,2 +22.12,2.88,Female,Yes,Sat,Dinner,2 +24.01,2.0,Male,Yes,Sat,Dinner,4 +15.69,3.0,Male,Yes,Sat,Dinner,3 +11.61,3.39,Male,No,Sat,Dinner,2 +10.77,1.47,Male,No,Sat,Dinner,2 +15.53,3.0,Male,Yes,Sat,Dinner,2 +10.07,1.25,Male,No,Sat,Dinner,2 +12.6,1.0,Male,Yes,Sat,Dinner,2 +32.83,1.17,Male,Yes,Sat,Dinner,2 +35.83,4.67,Female,No,Sat,Dinner,3 +29.03,5.92,Male,No,Sat,Dinner,3 +27.18,2.0,Female,Yes,Sat,Dinner,2 +22.67,2.0,Male,Yes,Sat,Dinner,2 +17.82,1.75,Male,No,Sat,Dinner,2 +18.78,3.0,Female,No,Thur,Dinner,2 diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index a9d51f47b90dc..7a8cf6cf3d4cf 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -91,6 +91,18 @@ def test_sequence_layers(self): self.assertTrue(self.data is last.data) self.assertTrue(rplot.sequence_layers([layer1])[0] is layer1) +class TestTrellis(unittest.TestCase): + def setUp(self): + path = os.path.join(curpath(), 'data/tips.csv') + self.data = read_csv(path, sep=',') + layer1 = rplot.Layer(self.data) + layer2 = rplot.GeomPoint(x='total_bill', y='tip') + layer3 = rplot.GeomPolyFit(2) + self.layers = [layer1, layer2, layer3] + self.trellis1 = rplot.TrellisGrid(['sex', 'smoker']) + self.trellis2 = rplot.TrellisGrid(['sex', '.']) + self.trellis3 = rplot.TrellisGrid(['.', 'smoker']) + class TestScaleGradient(unittest.TestCase): def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') From c592ab38c22f82d90cf4d775372a46c1d3741b47 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 14 Aug 2012 22:22:57 +0300 Subject: [PATCH 77/88] Added test_grid_sizes method --- pandas/tests/test_rplot.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 7a8cf6cf3d4cf..506c566a557d0 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -98,10 +98,36 @@ def setUp(self): layer1 = rplot.Layer(self.data) layer2 = rplot.GeomPoint(x='total_bill', y='tip') layer3 = rplot.GeomPolyFit(2) - self.layers = [layer1, layer2, layer3] + self.layers = rplot.sequence_layers([layer1, layer2, layer3]) self.trellis1 = rplot.TrellisGrid(['sex', 'smoker']) self.trellis2 = rplot.TrellisGrid(['sex', '.']) self.trellis3 = rplot.TrellisGrid(['.', 'smoker']) + self.trellised1 = self.trellis1.trellis(self.layers) + self.trellised2 = self.trellis2.trellis(self.layers) + self.trellised3 = self.trellis3.trellis(self.layers) + + def test_grid_sizes(self): + self.assertEqual(len(self.trellised1), 3) + self.assertEqual(len(self.trellised2), 3) + self.assertEqual(len(self.trellised3), 3) + self.assertEqual(len(self.trellised1[0]), 2) + self.assertEqual(len(self.trellised1[0][0]), 2) + self.assertEqual(len(self.trellised2[0]), 2) + self.assertEqual(len(self.trellised2[0][0]), 1) + self.assertEqual(len(self.trellised3[0]), 1) + self.assertEqual(len(self.trellised3[0][0]), 2) + self.assertEqual(len(self.trellised1[1]), 2) + self.assertEqual(len(self.trellised1[1][0]), 2) + self.assertEqual(len(self.trellised2[1]), 2) + self.assertEqual(len(self.trellised2[1][0]), 1) + self.assertEqual(len(self.trellised3[1]), 1) + self.assertEqual(len(self.trellised3[1][0]), 2) + self.assertEqual(len(self.trellised1[2]), 2) + self.assertEqual(len(self.trellised1[2][0]), 2) + self.assertEqual(len(self.trellised2[2]), 2) + self.assertEqual(len(self.trellised2[2][0]), 1) + self.assertEqual(len(self.trellised3[2]), 1) + self.assertEqual(len(self.trellised3[2][0]), 2) class TestScaleGradient(unittest.TestCase): def setUp(self): From 6ddf053065a3ca2def2e5410fe3526f5f801af0e Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 14 Aug 2012 22:35:49 +0300 Subject: [PATCH 78/88] Added test_trellis_cols_rows method --- pandas/tests/test_rplot.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 506c566a557d0..ae7e5dd948f7c 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -129,6 +129,14 @@ def test_grid_sizes(self): self.assertEqual(len(self.trellised3[2]), 1) self.assertEqual(len(self.trellised3[2][0]), 2) + def test_trellis_cols_rows(self): + self.assertEqual(self.trellis1.cols, 2) + self.assertEqual(self.trellis1.rows, 2) + self.assertEqual(self.trellis2.cols, 1) + self.assertEqual(self.trellis2.rows, 2) + self.assertEqual(self.trellis3.cols, 2) + self.assertEqual(self.trellis3.rows, 1) + class TestScaleGradient(unittest.TestCase): def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') From 1e75cabc9e11bc6c929b230158528cea863f0c7c Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 14 Aug 2012 23:26:26 +0300 Subject: [PATCH 79/88] Added TestRPlot1 class --- pandas/tests/test_rplot.py | 16 ++++++++++++++++ pandas/tools/rplot.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index ae7e5dd948f7c..692b577f8c3f3 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -2,6 +2,8 @@ import pandas.tools.rplot as rplot from pandas import read_csv import os +import matplotlib.pyplot as plt +import pdb def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) @@ -221,5 +223,19 @@ def test_scale_overflow(self): for index in range(len(self.data)): self.scale2(self.data, index) +class TestRPlot1(unittest.TestCase): + def setUp(self): + path = os.path.join(curpath(), 'data/tips.csv') + self.data = read_csv(path, sep=',') + self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') + self.plot + rplot.TrellisGrid(['sex', 'smoker']) + self.plot + rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size')) + self.fig = plt.gcf() + self.plot.render(self.fig) + pdb.set_trace() + + def test_subplots(self): + self.assertEqual(len(self.fig.axes), 4) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 27da4cf6f8595..e9ac6b985e8f1 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -282,10 +282,10 @@ def make_aes(x=None, y=None, size=None, colour=None, shape=None, alpha=None): pass else: raise ValueError('size mapping should be done through ScaleConstant or ScaleSize') - if any([isinstance(colour, scale) for scale in [ScaleConstant, ScaleGradient, ScaleGradient2]]) or colour is None: + if any([isinstance(colour, scale) for scale in [ScaleConstant, ScaleGradient, ScaleGradient2, ScaleRandomColour]]) or colour is None: pass else: - raise ValueError('colour mapping should be done through ScaleConstant, ScaleGradient or ScaleGradient2') + raise ValueError('colour mapping should be done through ScaleConstant, ScaleRandomColour, ScaleGradient or ScaleGradient2') if any([isinstance(shape, scale) for scale in [ScaleConstant, ScaleShape]]) or shape is None: pass else: From 8d5319f3c2b5d7b6847ab4bea259916d95ebcddb Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Tue, 14 Aug 2012 23:30:13 +0300 Subject: [PATCH 80/88] Added TestRPlot2 and TestRPlot3 --- pandas/tests/test_rplot.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 692b577f8c3f3..9a019758b7c45 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -232,10 +232,35 @@ def setUp(self): self.plot + rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size')) self.fig = plt.gcf() self.plot.render(self.fig) - pdb.set_trace() def test_subplots(self): - self.assertEqual(len(self.fig.axes), 4) + pass + +class TestRPlot2(unittest.TestCase): + def setUp(self): + path = os.path.join(curpath(), 'data/tips.csv') + self.data = read_csv(path, sep=',') + self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') + self.plot + rplot.TrellisGrid(['.', 'smoker']) + self.plot + rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size')) + self.fig = plt.gcf() + self.plot.render(self.fig) + + def test_subplots(self): + pass + +class TestRPlot3(unittest.TestCase): + def setUp(self): + path = os.path.join(curpath(), 'data/tips.csv') + self.data = read_csv(path, sep=',') + self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') + self.plot + rplot.TrellisGrid(['sex', '.']) + self.plot + rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size')) + self.fig = plt.gcf() + self.plot.render(self.fig) + + def test_subplots(self): + pass if __name__ == '__main__': unittest.main() \ No newline at end of file From 73efc7c5f62f1b204afc0d71949e34aecabcb780 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 15 Aug 2012 00:36:39 +0300 Subject: [PATCH 81/88] Reorganize TestRPlot --- pandas/tests/test_rplot.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 9a019758b7c45..7a9589b7bb2a1 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -223,9 +223,10 @@ def test_scale_overflow(self): for index in range(len(self.data)): self.scale2(self.data, index) -class TestRPlot1(unittest.TestCase): - def setUp(self): +class TestRPlot(unittest.TestCase): + def test_rplot1(self): path = os.path.join(curpath(), 'data/tips.csv') + plt.figure() self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') self.plot + rplot.TrellisGrid(['sex', 'smoker']) @@ -233,12 +234,9 @@ def setUp(self): self.fig = plt.gcf() self.plot.render(self.fig) - def test_subplots(self): - pass - -class TestRPlot2(unittest.TestCase): - def setUp(self): + def test_rplot2(self): path = os.path.join(curpath(), 'data/tips.csv') + plt.figure() self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') self.plot + rplot.TrellisGrid(['.', 'smoker']) @@ -246,12 +244,9 @@ def setUp(self): self.fig = plt.gcf() self.plot.render(self.fig) - def test_subplots(self): - pass - -class TestRPlot3(unittest.TestCase): - def setUp(self): + def test_rplot3(self): path = os.path.join(curpath(), 'data/tips.csv') + plt.figure() self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') self.plot + rplot.TrellisGrid(['sex', '.']) @@ -259,8 +254,5 @@ def setUp(self): self.fig = plt.gcf() self.plot.render(self.fig) - def test_subplots(self): - pass - if __name__ == '__main__': unittest.main() \ No newline at end of file From 07f1ce013a31fcae1fc339b00a890af15d965140 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 15 Aug 2012 01:49:21 +0300 Subject: [PATCH 82/88] Added the iris data set example to tests --- pandas/tests/test_rplot.py | 11 +++++++++++ pandas/tools/rplot.py | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 7a9589b7bb2a1..9681eafca9dc4 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -254,5 +254,16 @@ def test_rplot3(self): self.fig = plt.gcf() self.plot.render(self.fig) + def test_rplot_iris(self): + path = os.path.join(curpath(), 'data/iris.csv') + plt.figure() + self.data = read_csv(path, sep=',') + plot = rplot.RPlot(self.data, x='SepalLength', y='SepalWidth') + plot + rplot.GeomPoint(colour=rplot.ScaleGradient('PetalLength', colour1=(0.0, 1.0, 0.5), colour2=(1.0, 0.0, 0.5)), + size=rplot.ScaleSize('PetalWidth', min_size=10.0, max_size=200.0), + shape=rplot.ScaleShape('Name')) + self.fig = plt.gcf() + plot.render(self.fig) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index e9ac6b985e8f1..6fa037ccf7c7f 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -138,9 +138,9 @@ def __call__(self, data, index): data: pandas DataFrame index: pandas DataFrame row index """ - x = data[column].iget(index) - a = float(min(data[column])) - b = float(max(data[column])) + x = data[self.column].iget(index) + a = float(min(data[self.column])) + b = float(max(data[self.column])) return self.transform(self.min_size + ((x - a) / (b - a)) * (self.max_size - self.min_size)) From eaf347228cfe47f84435fb7a5bcec4d33df8beac Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Wed, 15 Aug 2012 02:16:14 +0300 Subject: [PATCH 83/88] Make sure legend displays with non-trellised plots and fix a bug with legend sorting --- pandas/tools/rplot.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 6fa037ccf7c7f..a7f76174f7225 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -773,7 +773,11 @@ def adjust_subplots(fig, axes, trellis, layers): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - for key in sorted(legend.keys(), key=lambda tup: (tup[1], tup[3])): + if len(legend.keys()[0]) == 2: + key_function = lambda tup: (tup[1]) + else: + key_function = lambda tup: (tup[1], tup[3]) + for key in sorted(legend.keys(), key=key_function): value = legend[key] patches.append(value) if len(key) == 2: @@ -834,6 +838,28 @@ def render(self, fig=None): new_layers = sequence_layers(self.layers) for layer in new_layers: layer.work(fig=fig) + legend = {} + for layer in new_layers: + legend = dictionary_union(legend, layer.legend) + patches = [] + labels = [] + if len(legend.keys()[0]) == 2: + key_function = lambda tup: (tup[1]) + else: + key_function = lambda tup: (tup[1], tup[3]) + for key in sorted(legend.keys(), key=key_function): + value = legend[key] + patches.append(value) + if len(key) == 2: + col, val = key + labels.append("%s" % str(val)) + elif len(key) == 4: + col1, val1, col2, val2 = key + labels.append("%s, %s" % (str(val1), str(val2))) + else: + raise ValueError("Maximum 2 categorical attributes to display a lengend of") + if len(legend): + fig.legend(patches, labels, loc='upper right') else: # We have a trellised plot. # First let's remove all other TrellisGrid instances from the layer list, From d83e4920586099421cad762fb03d5640d5404ab1 Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 17 Aug 2012 10:44:35 +0300 Subject: [PATCH 84/88] Changed + to add --- pandas/tests/test_rplot.py | 16 ++++++++-------- pandas/tools/rplot.py | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 9681eafca9dc4..f21296f9a952b 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -229,8 +229,8 @@ def test_rplot1(self): plt.figure() self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') - self.plot + rplot.TrellisGrid(['sex', 'smoker']) - self.plot + rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size')) + self.plot.add(rplot.TrellisGrid(['sex', 'smoker'])) + self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size'))) self.fig = plt.gcf() self.plot.render(self.fig) @@ -239,8 +239,8 @@ def test_rplot2(self): plt.figure() self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') - self.plot + rplot.TrellisGrid(['.', 'smoker']) - self.plot + rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size')) + self.plot.add(rplot.TrellisGrid(['.', 'smoker'])) + self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size'))) self.fig = plt.gcf() self.plot.render(self.fig) @@ -249,8 +249,8 @@ def test_rplot3(self): plt.figure() self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') - self.plot + rplot.TrellisGrid(['sex', '.']) - self.plot + rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size')) + self.plot.add(rplot.TrellisGrid(['sex', '.'])) + self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size'))) self.fig = plt.gcf() self.plot.render(self.fig) @@ -259,9 +259,9 @@ def test_rplot_iris(self): plt.figure() self.data = read_csv(path, sep=',') plot = rplot.RPlot(self.data, x='SepalLength', y='SepalWidth') - plot + rplot.GeomPoint(colour=rplot.ScaleGradient('PetalLength', colour1=(0.0, 1.0, 0.5), colour2=(1.0, 0.0, 0.5)), + plot.add(rplot.GeomPoint(colour=rplot.ScaleGradient('PetalLength', colour1=(0.0, 1.0, 0.5), colour2=(1.0, 0.0, 0.5)), size=rplot.ScaleSize('PetalWidth', min_size=10.0, max_size=200.0), - shape=rplot.ScaleShape('Name')) + shape=rplot.ScaleShape('Name'))) self.fig = plt.gcf() plot.render(self.fig) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index a7f76174f7225..713d298958d64 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -808,16 +808,16 @@ def __init__(self, data, x=None, y=None): self.layers = [Layer(data, **default_aes(x=x, y=y))] trellised = False - def __add__(self, other): + def add(self, layer): """Add a layer to RPlot instance. Parameters: ----------- - other: Layer instance + layer: Layer instance """ - if not isinstance(other, Layer): + if not isinstance(layer, Layer): raise TypeError("The operand on the right side of + must be a Layer instance") - self.layers.append(other) + self.layers.append(layer) def render(self, fig=None): """Render all the layers on a matplotlib figure. From b0b3dd403f69a3a596a1b4077597a746bdd541bd Mon Sep 17 00:00:00 2001 From: Vytautas Jancauskas Date: Fri, 17 Aug 2012 17:03:35 +0300 Subject: [PATCH 85/88] fix the case when there is no legend --- pandas/tools/rplot.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 713d298958d64..31b6dda3aae3a 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -773,7 +773,9 @@ def adjust_subplots(fig, axes, trellis, layers): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - if len(legend.keys()[0]) == 2: + if len(legend.keys()) == 0: + key_function = lambda tup: tup + elif len(legend.keys()[0]) == 2: key_function = lambda tup: (tup[1]) else: key_function = lambda tup: (tup[1], tup[3]) @@ -843,7 +845,9 @@ def render(self, fig=None): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - if len(legend.keys()[0]) == 2: + if len(legend.keys()) == 0: + key_function = lambda tup: tup + elif len(legend.keys()[0]) == 2: key_function = lambda tup: (tup[1]) else: key_function = lambda tup: (tup[1], tup[3]) From 54a68d7d15b7ee0bf97a10c42a765b1dd9654771 Mon Sep 17 00:00:00 2001 From: vytas Date: Wed, 3 Oct 2012 21:31:43 +0300 Subject: [PATCH 86/88] Added doc stubs --- doc/data/tips.csv | 245 +++++++++++++++++++++++++++++++++++++++++++ doc/source/index.rst | 1 + doc/source/rplot.rst | 44 ++++++++ 3 files changed, 290 insertions(+) create mode 100644 doc/data/tips.csv create mode 100644 doc/source/rplot.rst diff --git a/doc/data/tips.csv b/doc/data/tips.csv new file mode 100644 index 0000000000000..c4558cce4ce36 --- /dev/null +++ b/doc/data/tips.csv @@ -0,0 +1,245 @@ +obs,totbill,tip,sex,smoker,day,time,size +1,16.99, 1.01,F,No,Sun,Night,2 +2,10.34, 1.66,M,No,Sun,Night,3 +3,21.01, 3.50,M,No,Sun,Night,3 +4,23.68, 3.31,M,No,Sun,Night,2 +5,24.59, 3.61,F,No,Sun,Night,4 +6,25.29, 4.71,M,No,Sun,Night,4 +7, 8.77, 2.00,M,No,Sun,Night,2 +8,26.88, 3.12,M,No,Sun,Night,4 +9,15.04, 1.96,M,No,Sun,Night,2 +10,14.78, 3.23,M,No,Sun,Night,2 +11,10.27, 1.71,M,No,Sun,Night,2 +12,35.26, 5.00,F,No,Sun,Night,4 +13,15.42, 1.57,M,No,Sun,Night,2 +14,18.43, 3.00,M,No,Sun,Night,4 +15,14.83, 3.02,F,No,Sun,Night,2 +16,21.58, 3.92,M,No,Sun,Night,2 +17,10.33, 1.67,F,No,Sun,Night,3 +18,16.29, 3.71,M,No,Sun,Night,3 +19,16.97, 3.50,F,No,Sun,Night,3 +20,20.65, 3.35,M,No,Sat,Night,3 +21,17.92, 4.08,M,No,Sat,Night,2 +22,20.29, 2.75,F,No,Sat,Night,2 +23,15.77, 2.23,F,No,Sat,Night,2 +24,39.42, 7.58,M,No,Sat,Night,4 +25,19.82, 3.18,M,No,Sat,Night,2 +26,17.81, 2.34,M,No,Sat,Night,4 +27,13.37, 2.00,M,No,Sat,Night,2 +28,12.69, 2.00,M,No,Sat,Night,2 +29,21.70, 4.30,M,No,Sat,Night,2 +30,19.65, 3.00,F,No,Sat,Night,2 +31, 9.55, 1.45,M,No,Sat,Night,2 +32,18.35, 2.50,M,No,Sat,Night,4 +33,15.06, 3.00,F,No,Sat,Night,2 +34,20.69, 2.45,F,No,Sat,Night,4 +35,17.78, 3.27,M,No,Sat,Night,2 +36,24.06, 3.60,M,No,Sat,Night,3 +37,16.31, 2.00,M,No,Sat,Night,3 +38,16.93, 3.07,F,No,Sat,Night,3 +39,18.69, 2.31,M,No,Sat,Night,3 +40,31.27, 5.00,M,No,Sat,Night,3 +41,16.04, 2.24,M,No,Sat,Night,3 +42,17.46, 2.54,M,No,Sun,Night,2 +43,13.94, 3.06,M,No,Sun,Night,2 +44, 9.68, 1.32,M,No,Sun,Night,2 +45,30.40, 5.60,M,No,Sun,Night,4 +46,18.29, 3.00,M,No,Sun,Night,2 +47,22.23, 5.00,M,No,Sun,Night,2 +48,32.40, 6.00,M,No,Sun,Night,4 +49,28.55, 2.05,M,No,Sun,Night,3 +50,18.04, 3.00,M,No,Sun,Night,2 +51,12.54, 2.50,M,No,Sun,Night,2 +52,10.29, 2.60,F,No,Sun,Night,2 +53,34.81, 5.20,F,No,Sun,Night,4 +54, 9.94, 1.56,M,No,Sun,Night,2 +55,25.56, 4.34,M,No,Sun,Night,4 +56,19.49, 3.51,M,No,Sun,Night,2 +57,38.01, 3.00,M,Yes,Sat,Night,4 +58,26.41, 1.50,F,No,Sat,Night,2 +59,11.24, 1.76,M,Yes,Sat,Night,2 +60,48.27, 6.73,M,No,Sat,Night,4 +61,20.29, 3.21,M,Yes,Sat,Night,2 +62,13.81, 2.00,M,Yes,Sat,Night,2 +63,11.02, 1.98,M,Yes,Sat,Night,2 +64,18.29, 3.76,M,Yes,Sat,Night,4 +65,17.59, 2.64,M,No,Sat,Night,3 +66,20.08, 3.15,M,No,Sat,Night,3 +67,16.45, 2.47,F,No,Sat,Night,2 +68, 3.07, 1.00,F,Yes,Sat,Night,1 +69,20.23, 2.01,M,No,Sat,Night,2 +70,15.01, 2.09,M,Yes,Sat,Night,2 +71,12.02, 1.97,M,No,Sat,Night,2 +72,17.07, 3.00,F,No,Sat,Night,3 +73,26.86, 3.14,F,Yes,Sat,Night,2 +74,25.28, 5.00,F,Yes,Sat,Night,2 +75,14.73, 2.20,F,No,Sat,Night,2 +76,10.51, 1.25,M,No,Sat,Night,2 +77,17.92, 3.08,M,Yes,Sat,Night,2 +78,27.20, 4.00,M,No,Thu,Day,4 +79,22.76, 3.00,M,No,Thu,Day,2 +80,17.29, 2.71,M,No,Thu,Day,2 +81,19.44, 3.00,M,Yes,Thu,Day,2 +82,16.66, 3.40,M,No,Thu,Day,2 +83,10.07, 1.83,F,No,Thu,Day,1 +84,32.68, 5.00,M,Yes,Thu,Day,2 +85,15.98, 2.03,M,No,Thu,Day,2 +86,34.83, 5.17,F,No,Thu,Day,4 +87,13.03, 2.00,M,No,Thu,Day,2 +88,18.28, 4.00,M,No,Thu,Day,2 +89,24.71, 5.85,M,No,Thu,Day,2 +90,21.16, 3.00,M,No,Thu,Day,2 +91,28.97, 3.00,M,Yes,Fri,Night,2 +92,22.49, 3.50,M,No,Fri,Night,2 +93, 5.75, 1.00,F,Yes,Fri,Night,2 +94,16.32, 4.30,F,Yes,Fri,Night,2 +95,22.75, 3.25,F,No,Fri,Night,2 +96,40.17, 4.73,M,Yes,Fri,Night,4 +97,27.28, 4.00,M,Yes,Fri,Night,2 +98,12.03, 1.50,M,Yes,Fri,Night,2 +99,21.01, 3.00,M,Yes,Fri,Night,2 +100,12.46, 1.50,M,No,Fri,Night,2 +101,11.35, 2.50,F,Yes,Fri,Night,2 +102,15.38, 3.00,F,Yes,Fri,Night,2 +103,44.30, 2.50,F,Yes,Sat,Night,3 +104,22.42, 3.48,F,Yes,Sat,Night,2 +105,20.92, 4.08,F,No,Sat,Night,2 +106,15.36, 1.64,M,Yes,Sat,Night,2 +107,20.49, 4.06,M,Yes,Sat,Night,2 +108,25.21, 4.29,M,Yes,Sat,Night,2 +109,18.24, 3.76,M,No,Sat,Night,2 +110,14.31, 4.00,F,Yes,Sat,Night,2 +111,14.00, 3.00,M,No,Sat,Night,2 +112, 7.25, 1.00,F,No,Sat,Night,1 +113,38.07, 4.00,M,No,Sun,Night,3 +114,23.95, 2.55,M,No,Sun,Night,2 +115,25.71, 4.00,F,No,Sun,Night,3 +116,17.31, 3.50,F,No,Sun,Night,2 +117,29.93, 5.07,M,No,Sun,Night,4 +118,10.65, 1.50,F,No,Thu,Day,2 +119,12.43, 1.80,F,No,Thu,Day,2 +120,24.08, 2.92,F,No,Thu,Day,4 +121,11.69, 2.31,M,No,Thu,Day,2 +122,13.42, 1.68,F,No,Thu,Day,2 +123,14.26, 2.50,M,No,Thu,Day,2 +124,15.95, 2.00,M,No,Thu,Day,2 +125,12.48, 2.52,F,No,Thu,Day,2 +126,29.80, 4.20,F,No,Thu,Day,6 +127, 8.52, 1.48,M,No,Thu,Day,2 +128,14.52, 2.00,F,No,Thu,Day,2 +129,11.38, 2.00,F,No,Thu,Day,2 +130,22.82, 2.18,M,No,Thu,Day,3 +131,19.08, 1.50,M,No,Thu,Day,2 +132,20.27, 2.83,F,No,Thu,Day,2 +133,11.17, 1.50,F,No,Thu,Day,2 +134,12.26, 2.00,F,No,Thu,Day,2 +135,18.26, 3.25,F,No,Thu,Day,2 +136, 8.51, 1.25,F,No,Thu,Day,2 +137,10.33, 2.00,F,No,Thu,Day,2 +138,14.15, 2.00,F,No,Thu,Day,2 +139,16.00, 2.00,M,Yes,Thu,Day,2 +140,13.16, 2.75,F,No,Thu,Day,2 +141,17.47, 3.50,F,No,Thu,Day,2 +142,34.30, 6.70,M,No,Thu,Day,6 +143,41.19, 5.00,M,No,Thu,Day,5 +144,27.05, 5.00,F,No,Thu,Day,6 +145,16.43, 2.30,F,No,Thu,Day,2 +146, 8.35, 1.50,F,No,Thu,Day,2 +147,18.64, 1.36,F,No,Thu,Day,3 +148,11.87, 1.63,F,No,Thu,Day,2 +149, 9.78, 1.73,M,No,Thu,Day,2 +150, 7.51, 2.00,M,No,Thu,Day,2 +151,14.07, 2.50,M,No,Sun,Night,2 +152,13.13, 2.00,M,No,Sun,Night,2 +153,17.26, 2.74,M,No,Sun,Night,3 +154,24.55, 2.00,M,No,Sun,Night,4 +155,19.77, 2.00,M,No,Sun,Night,4 +156,29.85, 5.14,F,No,Sun,Night,5 +157,48.17, 5.00,M,No,Sun,Night,6 +158,25.00, 3.75,F,No,Sun,Night,4 +159,13.39, 2.61,F,No,Sun,Night,2 +160,16.49, 2.00,M,No,Sun,Night,4 +161,21.50, 3.50,M,No,Sun,Night,4 +162,12.66, 2.50,M,No,Sun,Night,2 +163,16.21, 2.00,F,No,Sun,Night,3 +164,13.81, 2.00,M,No,Sun,Night,2 +165,17.51, 3.00,F,Yes,Sun,Night,2 +166,24.52, 3.48,M,No,Sun,Night,3 +167,20.76, 2.24,M,No,Sun,Night,2 +168,31.71, 4.50,M,No,Sun,Night,4 +169,10.59, 1.61,F,Yes,Sat,Night,2 +170,10.63, 2.00,F,Yes,Sat,Night,2 +171,50.81,10.00,M,Yes,Sat,Night,3 +172,15.81, 3.16,M,Yes,Sat,Night,2 +173, 7.25, 5.15,M,Yes,Sun,Night,2 +174,31.85, 3.18,M,Yes,Sun,Night,2 +175,16.82, 4.00,M,Yes,Sun,Night,2 +176,32.90, 3.11,M,Yes,Sun,Night,2 +177,17.89, 2.00,M,Yes,Sun,Night,2 +178,14.48, 2.00,M,Yes,Sun,Night,2 +179, 9.60, 4.00,F,Yes,Sun,Night,2 +180,34.63, 3.55,M,Yes,Sun,Night,2 +181,34.65, 3.68,M,Yes,Sun,Night,4 +182,23.33, 5.65,M,Yes,Sun,Night,2 +183,45.35, 3.50,M,Yes,Sun,Night,3 +184,23.17, 6.50,M,Yes,Sun,Night,4 +185,40.55, 3.00,M,Yes,Sun,Night,2 +186,20.69, 5.00,M,No,Sun,Night,5 +187,20.90, 3.50,F,Yes,Sun,Night,3 +188,30.46, 2.00,M,Yes,Sun,Night,5 +189,18.15, 3.50,F,Yes,Sun,Night,3 +190,23.10, 4.00,M,Yes,Sun,Night,3 +191,15.69, 1.50,M,Yes,Sun,Night,2 +192,19.81, 4.19,F,Yes,Thu,Day,2 +193,28.44, 2.56,M,Yes,Thu,Day,2 +194,15.48, 2.02,M,Yes,Thu,Day,2 +195,16.58, 4.00,M,Yes,Thu,Day,2 +196, 7.56, 1.44,M,No,Thu,Day,2 +197,10.34, 2.00,M,Yes,Thu,Day,2 +198,43.11, 5.00,F,Yes,Thu,Day,4 +199,13.00, 2.00,F,Yes,Thu,Day,2 +200,13.51, 2.00,M,Yes,Thu,Day,2 +201,18.71, 4.00,M,Yes,Thu,Day,3 +202,12.74, 2.01,F,Yes,Thu,Day,2 +203,13.00, 2.00,F,Yes,Thu,Day,2 +204,16.40, 2.50,F,Yes,Thu,Day,2 +205,20.53, 4.00,M,Yes,Thu,Day,4 +206,16.47, 3.23,F,Yes,Thu,Day,3 +207,26.59, 3.41,M,Yes,Sat,Night,3 +208,38.73, 3.00,M,Yes,Sat,Night,4 +209,24.27, 2.03,M,Yes,Sat,Night,2 +210,12.76, 2.23,F,Yes,Sat,Night,2 +211,30.06, 2.00,M,Yes,Sat,Night,3 +212,25.89, 5.16,M,Yes,Sat,Night,4 +213,48.33, 9.00,M,No,Sat,Night,4 +214,13.27, 2.50,F,Yes,Sat,Night,2 +215,28.17, 6.50,F,Yes,Sat,Night,3 +216,12.90, 1.10,F,Yes,Sat,Night,2 +217,28.15, 3.00,M,Yes,Sat,Night,5 +218,11.59, 1.50,M,Yes,Sat,Night,2 +219, 7.74, 1.44,M,Yes,Sat,Night,2 +220,30.14, 3.09,F,Yes,Sat,Night,4 +221,12.16, 2.20,M,Yes,Fri,Day,2 +222,13.42, 3.48,F,Yes,Fri,Day,2 +223, 8.58, 1.92,M,Yes,Fri,Day,1 +224,15.98, 3.00,F,No,Fri,Day,3 +225,13.42, 1.58,M,Yes,Fri,Day,2 +226,16.27, 2.50,F,Yes,Fri,Day,2 +227,10.09, 2.00,F,Yes,Fri,Day,2 +228,20.45, 3.00,M,No,Sat,Night,4 +229,13.28, 2.72,M,No,Sat,Night,2 +230,22.12, 2.88,F,Yes,Sat,Night,2 +231,24.01, 2.00,M,Yes,Sat,Night,4 +232,15.69, 3.00,M,Yes,Sat,Night,3 +233,11.61, 3.39,M,No,Sat,Night,2 +234,10.77, 1.47,M,No,Sat,Night,2 +235,15.53, 3.00,M,Yes,Sat,Night,2 +236,10.07, 1.25,M,No,Sat,Night,2 +237,12.60, 1.00,M,Yes,Sat,Night,2 +238,32.83, 1.17,M,Yes,Sat,Night,2 +239,35.83, 4.67,F,No,Sat,Night,3 +240,29.03, 5.92,M,No,Sat,Night,3 +241,27.18, 2.00,F,Yes,Sat,Night,2 +242,22.67, 2.00,M,Yes,Sat,Night,2 +243,17.82, 1.75,M,No,Sat,Night,2 +244,18.78, 3.00,F,No,Thu,Night,2 diff --git a/doc/source/index.rst b/doc/source/index.rst index b4b9231b6d34d..4ef6f1b105dd7 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -122,6 +122,7 @@ See the package overview for more detail about what's in the library. reshaping timeseries visualization + rplot io sparse gotchas diff --git a/doc/source/rplot.rst b/doc/source/rplot.rst new file mode 100644 index 0000000000000..784369d34b8b8 --- /dev/null +++ b/doc/source/rplot.rst @@ -0,0 +1,44 @@ +.. currentmodule:: pandas +.. _rplot: + +.. ipython:: python + :suppress: + + import numpy as np + np.random.seed(123456) + from pandas import * + import pandas.util.testing as tm + randn = np.random.randn + np.set_printoptions(precision=4, suppress=True) + import matplotlib.pyplot as plt + tips_data = read_csv('data/tips.csv') + from pandas import read_csv + from pandas.tools.plotting import radviz + import pandas.tools.rplot as rplot + plt.close('all') + +************************** +Trellis plotting interface +************************** + +.. ipython:: python + + plt.figure() + + plot = rplot.RPlot(tips_data, x='totbill', y='tip') + plot.add(rplot.TrellisGrid(['sex', 'smoker'])) + plot.add(rplot.GeomHistogram()) + + @savefig rplot1_tips.png width=6in + plot.render(plt.gcf()) + +.. ipython:: python + + plt.figure() + + plot = rplot.RPlot(tips_data, x='totbill', y='tip') + plot.add(rplot.TrellisGrid(['sex', 'smoker'])) + plot.add(rplot.GeomDensity()) + + @savefig rplot2_tips.png width=6in + plot.render(plt.gcf()) \ No newline at end of file From cfefe13e42336bf1af3fe48f7f42210376212e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vytautas=20Jan=C4=8Dauskas?= Date: Thu, 4 Oct 2012 00:34:54 +0300 Subject: [PATCH 87/88] Added rplot examples to doc --- doc/source/rplot.rst | 70 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/doc/source/rplot.rst b/doc/source/rplot.rst index 784369d34b8b8..e03d78f579ee3 100644 --- a/doc/source/rplot.rst +++ b/doc/source/rplot.rst @@ -12,6 +12,7 @@ np.set_printoptions(precision=4, suppress=True) import matplotlib.pyplot as plt tips_data = read_csv('data/tips.csv') + iris_data = read_csv('data/iris.data') from pandas import read_csv from pandas.tools.plotting import radviz import pandas.tools.rplot as rplot @@ -21,6 +22,8 @@ Trellis plotting interface ************************** +RPlot is a flexible API for producing Trellis plots. These plots allow you to arrange data in a rectangular grid by values of certain attributes. + .. ipython:: python plt.figure() @@ -29,7 +32,7 @@ Trellis plotting interface plot.add(rplot.TrellisGrid(['sex', 'smoker'])) plot.add(rplot.GeomHistogram()) - @savefig rplot1_tips.png width=6in + @savefig rplot1_tips.png width=8in plot.render(plt.gcf()) .. ipython:: python @@ -40,5 +43,66 @@ Trellis plotting interface plot.add(rplot.TrellisGrid(['sex', 'smoker'])) plot.add(rplot.GeomDensity()) - @savefig rplot2_tips.png width=6in - plot.render(plt.gcf()) \ No newline at end of file + @savefig rplot2_tips.png width=8in + plot.render(plt.gcf()) + +.. ipython:: python + + plt.figure() + + plot = rplot.RPlot(tips_data, x='totbill', y='tip') + plot.add(rplot.TrellisGrid(['sex', 'smoker'])) + plot.add(rplot.GeomScatter()) + plot.add(rplot.GeomPolyFit(degree=2)) + + @savefig rplot3_tips.png width=8in + plot.render(plt.gcf()) + +.. ipython:: python + + plt.figure() + + plot = rplot.RPlot(tips_data, x='totbill', y='tip') + plot.add(rplot.TrellisGrid(['sex', 'smoker'])) + plot.add(rplot.GeomScatter()) + plot.add(rplot.GeomDensity2D()) + + @savefig rplot4_tips.png width=8in + plot.render(plt.gcf()) + +.. ipython:: python + + plt.figure() + + plot = rplot.RPlot(tips_data, x='totbill', y='tip') + plot.add(rplot.TrellisGrid(['sex', '.'])) + plot.add(rplot.GeomHistogram()) + + @savefig rplot5_tips.png width=8in + plot.render(plt.gcf()) + +.. ipython:: python + + plt.figure() + + plot = rplot.RPlot(tips_data, x='totbill', y='tip') + plot.add(rplot.TrellisGrid(['.', 'smoker'])) + plot.add(rplot.GeomHistogram()) + + @savefig rplot6_tips.png width=8in + plot.render(plt.gcf()) + +.. ipython:: python + + plt.figure() + + plot = rplot.RPlot(tips_data, x='totbill', y='tip') + plot.add(rplot.TrellisGrid(['.', 'smoker'])) + plot.add(rplot.GeomHistogram()) + + plot = rplot.RPlot(tips_data, x='tip', y='totbill') + plot.add(rplot.TrellisGrid(['sex', 'smoker'])) + plot.add(rplot.GeomPoint(size=80.0, colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size'), alpha=1.0)) + + @savefig rplot7_tips.png width=8in + plot.render(plt.gcf()) From 4e1952333f1ced28b0ca9206c0fb576904ac2e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vytautas=20Jan=C4=8Dauskas?= Date: Thu, 4 Oct 2012 01:27:04 +0300 Subject: [PATCH 88/88] Some more documentation --- doc/source/rplot.rst | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/doc/source/rplot.rst b/doc/source/rplot.rst index e03d78f579ee3..7153d5323c805 100644 --- a/doc/source/rplot.rst +++ b/doc/source/rplot.rst @@ -22,6 +22,10 @@ Trellis plotting interface ************************** +-------- +Examples +-------- + RPlot is a flexible API for producing Trellis plots. These plots allow you to arrange data in a rectangular grid by values of certain attributes. .. ipython:: python @@ -35,6 +39,8 @@ RPlot is a flexible API for producing Trellis plots. These plots allow you to ar @savefig rplot1_tips.png width=8in plot.render(plt.gcf()) +In the example above, data from the tips data set is arranged by the attributes 'sex' and 'smoker'. Since both of those attributes can take on one of two values, the resulting grid has two columns and two rows. A histogram is displayed for each cell of the grid. + .. ipython:: python plt.figure() @@ -46,6 +52,8 @@ RPlot is a flexible API for producing Trellis plots. These plots allow you to ar @savefig rplot2_tips.png width=8in plot.render(plt.gcf()) +Example above is the same as previous except the plot is set to kernel density estimation. This shows how easy it is to have different plots for the same Trellis structure. + .. ipython:: python plt.figure() @@ -58,6 +66,8 @@ RPlot is a flexible API for producing Trellis plots. These plots allow you to ar @savefig rplot3_tips.png width=8in plot.render(plt.gcf()) +The plot above shows that it is possible to have two or more plots for the same data displayed on the same Trellis grid cell. + .. ipython:: python plt.figure() @@ -70,6 +80,8 @@ RPlot is a flexible API for producing Trellis plots. These plots allow you to ar @savefig rplot4_tips.png width=8in plot.render(plt.gcf()) +Above is a similar plot but with 2D kernel desnity estimation plot superimposed. + .. ipython:: python plt.figure() @@ -81,6 +93,8 @@ RPlot is a flexible API for producing Trellis plots. These plots allow you to ar @savefig rplot5_tips.png width=8in plot.render(plt.gcf()) +It is possible to only use one attribute for grouping data. The example above only uses 'sex' attribute. If the second grouping attribute is not specified, the plots will be arranged in a column. + .. ipython:: python plt.figure() @@ -92,6 +106,8 @@ RPlot is a flexible API for producing Trellis plots. These plots allow you to ar @savefig rplot6_tips.png width=8in plot.render(plt.gcf()) +If the first grouping attribute is not specified the plots will be arranged in a row. + .. ipython:: python plt.figure() @@ -106,3 +122,39 @@ RPlot is a flexible API for producing Trellis plots. These plots allow you to ar @savefig rplot7_tips.png width=8in plot.render(plt.gcf()) + +As shown above, scatter plots are also possible. Scatter plots allow you to map various data attributes to graphical properties of the plot. In the example above the colour and shape of the scatter plot graphical objects is mapped to 'day' and 'size' attributes respectively. You use scale objects to specify these mappings. The list of scale classes is given below with initialization arguments for quick reference. + +------ +Scales +------ + +:: + + ScaleGradient(column, colour1, colour2) + +This one allows you to map an attribute (specified by parameter column) value to the colour of a graphical object. The larger the value of the attribute the closer the colour will be to colour2, the smaller the value, the closer it will be to colour1. + +:: + + ScaleGradient2(column, colour1, colour2, colour3) + +The same as ScaleGradient but interpolates linearly between three colours instead of two. + +:: + + ScaleSize(column, min_size, max_size, transform) + +Map attribute value to size of the graphical object. Parameter min_size (default 5.0) is the minimum size of the graphical object, max_size (default 100.0) is the maximum size and transform is a one argument function that will be used to transform the attribute value (defaults to lambda x: x). + +:: + + ScaleShape(column) + +Map the shape of the object to attribute value. The attribute has to be categorical. + +:: + + ScaleRandomColour(column) + +Assign a random colour to a value of categorical attribute specified by column.