Skip to content

Commit

Permalink
feat: design improvements on the correlations section
Browse files Browse the repository at this point in the history
  • Loading branch information
ricardodcpereira authored and aquemy committed Dec 21, 2022
1 parent 0debcb7 commit e5cd8cf
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 110 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class HTMLCorrelationTable(CorrelationTable):
def render(self) -> str:
correlation_matrix_html = self.content["correlation_matrix"].to_html(
classes="correlation-table table table-striped"
classes="correlation-table table table-striped", float_format="{:.3f}".format
)
return templates.template("correlation_table.html").render(
**self.content, correlation_matrix_html=correlation_matrix_html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ body {
border-top: hidden;
}

.row.spacing, [id^=correlations_tab] {
.row.spacing {
padding: 2em 1em;
}

Expand Down Expand Up @@ -249,6 +249,7 @@ table.freq.mini {
}
.img-responsive{
max-width: 99%;
min-width: 99%;
}
.footer-text{
padding:20px;
Expand Down
112 changes: 19 additions & 93 deletions src/pandas_profiling/report/structure/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@

from pandas_profiling.config import Settings
from pandas_profiling.report.presentation.core import (
HTML,
Collapse,
Container,
CorrelationTable,
Image,
ToggleButton,
)
from pandas_profiling.report.presentation.core.renderable import Renderable
from pandas_profiling.visualisation import plot
Expand All @@ -25,62 +22,19 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
"""
items: List[Renderable] = []

pearson_description = (
"The Pearson's correlation coefficient (<em>r</em>) is a measure of linear correlation "
"between two variables. It's value lies between -1 and +1, -1 indicating total negative "
"linear correlation, 0 indicating no linear correlation and 1 indicating total positive "
"linear correlation. Furthermore, <em>r</em> is invariant under separate changes in location "
"and scale of the two variables, implying that for a linear function the angle to the "
"x-axis does not affect <em>r</em>.<br /><br />To calculate <em>r</em> for two "
"variables <em>X</em> and <em>Y</em>, one divides the covariance of <em>X</em> and "
"<em>Y</em> by the product of their standard deviations. "
)
spearman_description = """The Spearman's rank correlation coefficient (<em>ρ</em>) is a measure of monotonic
correlation between two variables, and is therefore better in catching nonlinear monotonic correlations than
Pearson's <em>r</em>. It's value lies between -1 and +1, -1 indicating total negative monotonic correlation,
0 indicating no monotonic correlation and 1 indicating total positive monotonic correlation.<br /><br />To
calculate <em>ρ</em> for two variables <em>X</em> and <em>Y</em>, one divides the covariance of the rank
variables of <em>X</em> and <em>Y</em> by the product of their standard deviations. """

kendall_description = """Similarly to Spearman's rank correlation coefficient, the Kendall rank correlation
coefficient (<em>τ</em>) measures ordinal association between two variables. It's value lies between -1 and +1,
-1 indicating total negative correlation, 0 indicating no correlation and 1 indicating total positive correlation.
<br /><br />To calculate <em>τ</em> for two variables <em>X</em> and <em>Y</em>, one determines the number of
concordant and discordant pairs of observations. <em>τ</em> is given by the number of concordant pairs minus the
discordant pairs divided by the total number of pairs."""

phi_k_description = """Phik (φk) is a new and practical correlation coefficient that works consistently between categorical, ordinal and interval variables, captures non-linear dependency and reverts to the Pearson correlation coefficient in case
of a bivariate normal input distribution. There is extensive documentation available <a href='https://phik.readthedocs.io/en/latest/index.html'>here</a>."""

cramers_description = """Cramér's V is an association measure for nominal random variables. The coefficient ranges from 0 to 1, with 0 indicating independence and 1 indicating perfect association.
The empirical estimators used for Cramér's V have been proved to be biased, even for large samples.
We use a bias-corrected measure that has been proposed by Bergsma in 2013 that can be found <a href='http://stats.lse.ac.uk/bergsma/pdf/cramerV3.pdf'>here</a>."""

auto_description = """
The auto setting is an interpretable pairwise
column metric of the following mapping:
<ul>
<li> Categorical-Categorical : Cramer's V, <strong> [0,1] </strong> <br /> </li>
<li> Numerical-Categorical : Cramer's V, <strong> [0,1] </strong> (using a discretized numerical column) <br /> </li>
<li> Numerical-Numerical : Spearman's ρ, <strong> [-1,1] </strong> <br /> </li>
</ul>
The number of bins used in the discretization for the Numerical-Categorical column pair can be changed
using config.correlations["auto"].n_bins. The number of bins affects the granularity of the association you wish to measure. <br><br>
This configuration uses the recommended metric for each pair of columns."""

key_to_data = {
"pearson": (-1, "Pearson's r", pearson_description),
"spearman": (-1, "Spearman's ρ", spearman_description),
"kendall": (-1, "Kendall's τ", kendall_description),
"phi_k": (0, "Phik (φk)", phi_k_description),
"cramers": (0, "Cramér's V (φc)", cramers_description),
"auto": (-1, "Auto", auto_description),
"pearson": (-1, "Pearson's r"),
"spearman": (-1, "Spearman's ρ"),
"kendall": (-1, "Kendall's τ"),
"phi_k": (0, "Phik (φk)"),
"cramers": (0, "Cramér's V (φc)"),
"auto": (-1, "Auto"),
}

image_format = config.plot.image_format

for key, item in summary["correlations"].items():
vmin, name, description = key_to_data[key]
vmin, name = key_to_data[key]

if isinstance(item, list):
diagrams: List[Renderable] = []
Expand All @@ -95,19 +49,12 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
)
diagrams.append(diagram)

desc = HTML(
f'<div style="padding:20px" class="text-muted">{description}</div>',
anchor_id=f"{key}_html",
classes="correlation-description",
name=name,
)

diagrams_with_desc = Container(
diagrams + [desc],
diagrams_grid = Container(
diagrams,
anchor_id=f"{key}_diagram_with_desc",
name="Heatmap" if config.correlation_table else name,
sequence_type="batch_grid",
batch_size=len(config.html.style._labels) + 1,
batch_size=len(config.html.style._labels),
)

if config.correlation_table:
Expand All @@ -129,36 +76,23 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
)

diagrams_tables_tab = Container(
[diagrams_with_desc, tables_tab],
[diagrams_grid, tables_tab],
anchor_id=f"{key}_diagram_table",
name=name,
sequence_type="tabs",
)

items.append(diagrams_tables_tab)
else:
items.append(diagrams_with_desc)
items.append(diagrams_grid)
else:
diagram = Image(
plot.correlation_matrix(config, item, vmin=vmin),
image_format=image_format,
alt=name,
anchor_id=f"{key}_diagram",
name=name,
classes="correlation-diagram",
)

desc = HTML(
f'<div style="padding:20px" class="text-muted"><h3>{name}</h3>{description}</div>',
anchor_id=f"{key}_html",
classes="correlation-description",
)

diagram_with_desc = Container(
[diagram, desc],
anchor_id=f"{key}_diagram_with_desc",
name="Heatmap" if config.correlation_table else name,
sequence_type="grid",
classes="correlation-diagram",
)

if config.correlation_table:
Expand All @@ -167,32 +101,24 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
)

diagram_table_tabs = Container(
[diagram_with_desc, table],
[diagram, table],
anchor_id=f"{key}_diagram_table",
name=name,
sequence_type="tabs",
)

items.append(diagram_table_tabs)
else:
items.append(diagram_with_desc)
items.append(diagram)

corr = Container(
items,
sequence_type="tabs",
name="Correlations Tab",
name="Correlations",
anchor_id="correlations_tab",
)

if len(items) > 0:
btn = ToggleButton(
"Show correlation descriptions",
anchor_id="toggle-correlation-description",
name="Show correlation descriptions",
)

return Collapse(
name="Correlations", anchor_id="correlations", button=btn, item=corr
)
else:
return None
return corr

return None
22 changes: 7 additions & 15 deletions tests/unit/test_correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,35 +47,29 @@ def generate_report(correlation_table: bool):
def test_standard_report_with_correlation_table():
report = generate_report(correlation_table=True)
renderable = get_correlation_items(report.config, report.description_set)
for cor_item in renderable.content["item"].content["items"]:
diagram_with_desc, table = cor_item.content["items"]
for cor_item in renderable.content["items"]:
diagram, table = cor_item.content["items"]
assert isinstance(table, CorrelationTable)
diagram, description = diagram_with_desc.content["items"]
assert isinstance(diagram, Image)
assert isinstance(description, HTML)


def test_standard_report_without_correlation_table():
report = generate_report(correlation_table=False)
renderable = get_correlation_items(report.config, report.description_set)
for cor_item in renderable.content["item"].content["items"]:
diagram, description = cor_item.content["items"]
for diagram in renderable.content["items"]:
assert isinstance(diagram, Image)
assert isinstance(description, HTML)


def test_compare_report_with_correlation_table():
report1 = generate_report(correlation_table=True)
report2 = generate_report(correlation_table=True)
comp_report = report1.compare(report2)
renderable = get_correlation_items(comp_report.config, comp_report.description_set)
for cor_items in renderable.content["item"].content["items"]:
diagrams_with_desc, tables = cor_items.content["items"]
for cor_items in renderable.content["items"]:
diagrams, tables = cor_items.content["items"]
for table in tables.content["items"]:
assert isinstance(table, CorrelationTable)
description = diagrams_with_desc.content["items"].pop()
assert isinstance(description, HTML)
for diagram in diagrams_with_desc.content["items"]:
for diagram in diagrams.content["items"]:
assert isinstance(diagram, Image)


Expand All @@ -84,8 +78,6 @@ def test_compare_report_without_correlation_table():
report2 = generate_report(correlation_table=False)
comp_report = report1.compare(report2)
renderable = get_correlation_items(comp_report.config, comp_report.description_set)
for cor_items in renderable.content["item"].content["items"]:
description = cor_items.content["items"].pop()
assert isinstance(description, HTML)
for cor_items in renderable.content["items"]:
for diagram in cor_items.content["items"]:
assert isinstance(diagram, Image)

0 comments on commit e5cd8cf

Please sign in to comment.