From e5cd8cfb4b91f22b3435f9830f516e929c4e8d32 Mon Sep 17 00:00:00 2001 From: ricardodcpereira Date: Wed, 7 Dec 2022 10:54:19 +0000 Subject: [PATCH] feat: design improvements on the correlations section --- .../flavours/html/correlation_table.py | 2 +- .../html/templates/wrapper/assets/style.css | 3 +- .../report/structure/correlations.py | 112 +++--------------- tests/unit/test_correlations.py | 22 ++-- 4 files changed, 29 insertions(+), 110 deletions(-) diff --git a/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py b/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py index cb3307ea7..a78df1820 100644 --- a/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py +++ b/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py @@ -5,7 +5,7 @@ class HTMLCorrelationTable(CorrelationTable): def render(self) -> str: correlation_matrix_html = self.content["correlation_matrix"].to_html( - classes="correlation-table table table-striped" + classes="correlation-table table table-striped", float_format="{:.3f}".format ) return templates.template("correlation_table.html").render( **self.content, correlation_matrix_html=correlation_matrix_html diff --git a/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css b/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css index a598d3b43..47aacb881 100644 --- a/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css +++ b/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css @@ -18,7 +18,7 @@ body { border-top: hidden; } -.row.spacing, [id^=correlations_tab] { +.row.spacing { padding: 2em 1em; } @@ -249,6 +249,7 @@ table.freq.mini { } .img-responsive{ max-width: 99%; + min-width: 99%; } .footer-text{ padding:20px; diff --git a/src/pandas_profiling/report/structure/correlations.py b/src/pandas_profiling/report/structure/correlations.py index 3160ebad0..8f7189619 100644 --- a/src/pandas_profiling/report/structure/correlations.py +++ b/src/pandas_profiling/report/structure/correlations.py @@ -2,12 +2,9 @@ from pandas_profiling.config import Settings from pandas_profiling.report.presentation.core import ( - HTML, - Collapse, Container, CorrelationTable, Image, - ToggleButton, ) from pandas_profiling.report.presentation.core.renderable import Renderable from pandas_profiling.visualisation import plot @@ -25,62 +22,19 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl """ items: List[Renderable] = [] - pearson_description = ( - "The Pearson's correlation coefficient (r) is a measure of linear correlation " - "between two variables. It's value lies between -1 and +1, -1 indicating total negative " - "linear correlation, 0 indicating no linear correlation and 1 indicating total positive " - "linear correlation. Furthermore, r is invariant under separate changes in location " - "and scale of the two variables, implying that for a linear function the angle to the " - "x-axis does not affect r.

To calculate r for two " - "variables X and Y, one divides the covariance of X and " - "Y by the product of their standard deviations. " - ) - spearman_description = """The Spearman's rank correlation coefficient (ρ) is a measure of monotonic - correlation between two variables, and is therefore better in catching nonlinear monotonic correlations than - Pearson's r. It's value lies between -1 and +1, -1 indicating total negative monotonic correlation, - 0 indicating no monotonic correlation and 1 indicating total positive monotonic correlation.

To - calculate ρ for two variables X and Y, one divides the covariance of the rank - variables of X and Y by the product of their standard deviations. """ - - kendall_description = """Similarly to Spearman's rank correlation coefficient, the Kendall rank correlation - coefficient (τ) measures ordinal association between two variables. It's value lies between -1 and +1, - -1 indicating total negative correlation, 0 indicating no correlation and 1 indicating total positive correlation. -

To calculate τ for two variables X and Y, one determines the number of - concordant and discordant pairs of observations. τ is given by the number of concordant pairs minus the - discordant pairs divided by the total number of pairs.""" - - phi_k_description = """Phik (φk) is a new and practical correlation coefficient that works consistently between categorical, ordinal and interval variables, captures non-linear dependency and reverts to the Pearson correlation coefficient in case - of a bivariate normal input distribution. There is extensive documentation available here.""" - - cramers_description = """Cramér's V is an association measure for nominal random variables. The coefficient ranges from 0 to 1, with 0 indicating independence and 1 indicating perfect association. - The empirical estimators used for Cramér's V have been proved to be biased, even for large samples. - We use a bias-corrected measure that has been proposed by Bergsma in 2013 that can be found here.""" - - auto_description = """ - The auto setting is an interpretable pairwise - column metric of the following mapping: - - The number of bins used in the discretization for the Numerical-Categorical column pair can be changed - using config.correlations["auto"].n_bins. The number of bins affects the granularity of the association you wish to measure.

- This configuration uses the recommended metric for each pair of columns.""" - key_to_data = { - "pearson": (-1, "Pearson's r", pearson_description), - "spearman": (-1, "Spearman's ρ", spearman_description), - "kendall": (-1, "Kendall's τ", kendall_description), - "phi_k": (0, "Phik (φk)", phi_k_description), - "cramers": (0, "Cramér's V (φc)", cramers_description), - "auto": (-1, "Auto", auto_description), + "pearson": (-1, "Pearson's r"), + "spearman": (-1, "Spearman's ρ"), + "kendall": (-1, "Kendall's τ"), + "phi_k": (0, "Phik (φk)"), + "cramers": (0, "Cramér's V (φc)"), + "auto": (-1, "Auto"), } image_format = config.plot.image_format for key, item in summary["correlations"].items(): - vmin, name, description = key_to_data[key] + vmin, name = key_to_data[key] if isinstance(item, list): diagrams: List[Renderable] = [] @@ -95,19 +49,12 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl ) diagrams.append(diagram) - desc = HTML( - f'
{description}
', - anchor_id=f"{key}_html", - classes="correlation-description", - name=name, - ) - - diagrams_with_desc = Container( - diagrams + [desc], + diagrams_grid = Container( + diagrams, anchor_id=f"{key}_diagram_with_desc", name="Heatmap" if config.correlation_table else name, sequence_type="batch_grid", - batch_size=len(config.html.style._labels) + 1, + batch_size=len(config.html.style._labels), ) if config.correlation_table: @@ -129,7 +76,7 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl ) diagrams_tables_tab = Container( - [diagrams_with_desc, tables_tab], + [diagrams_grid, tables_tab], anchor_id=f"{key}_diagram_table", name=name, sequence_type="tabs", @@ -137,28 +84,15 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl items.append(diagrams_tables_tab) else: - items.append(diagrams_with_desc) + items.append(diagrams_grid) else: diagram = Image( plot.correlation_matrix(config, item, vmin=vmin), image_format=image_format, alt=name, anchor_id=f"{key}_diagram", - name=name, - classes="correlation-diagram", - ) - - desc = HTML( - f'

{name}

{description}
', - anchor_id=f"{key}_html", - classes="correlation-description", - ) - - diagram_with_desc = Container( - [diagram, desc], - anchor_id=f"{key}_diagram_with_desc", name="Heatmap" if config.correlation_table else name, - sequence_type="grid", + classes="correlation-diagram", ) if config.correlation_table: @@ -167,7 +101,7 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl ) diagram_table_tabs = Container( - [diagram_with_desc, table], + [diagram, table], anchor_id=f"{key}_diagram_table", name=name, sequence_type="tabs", @@ -175,24 +109,16 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl items.append(diagram_table_tabs) else: - items.append(diagram_with_desc) + items.append(diagram) corr = Container( items, sequence_type="tabs", - name="Correlations Tab", + name="Correlations", anchor_id="correlations_tab", ) if len(items) > 0: - btn = ToggleButton( - "Show correlation descriptions", - anchor_id="toggle-correlation-description", - name="Show correlation descriptions", - ) - - return Collapse( - name="Correlations", anchor_id="correlations", button=btn, item=corr - ) - else: - return None + return corr + + return None diff --git a/tests/unit/test_correlations.py b/tests/unit/test_correlations.py index 73e8fd7b6..a9b655bec 100644 --- a/tests/unit/test_correlations.py +++ b/tests/unit/test_correlations.py @@ -47,21 +47,17 @@ def generate_report(correlation_table: bool): def test_standard_report_with_correlation_table(): report = generate_report(correlation_table=True) renderable = get_correlation_items(report.config, report.description_set) - for cor_item in renderable.content["item"].content["items"]: - diagram_with_desc, table = cor_item.content["items"] + for cor_item in renderable.content["items"]: + diagram, table = cor_item.content["items"] assert isinstance(table, CorrelationTable) - diagram, description = diagram_with_desc.content["items"] assert isinstance(diagram, Image) - assert isinstance(description, HTML) def test_standard_report_without_correlation_table(): report = generate_report(correlation_table=False) renderable = get_correlation_items(report.config, report.description_set) - for cor_item in renderable.content["item"].content["items"]: - diagram, description = cor_item.content["items"] + for diagram in renderable.content["items"]: assert isinstance(diagram, Image) - assert isinstance(description, HTML) def test_compare_report_with_correlation_table(): @@ -69,13 +65,11 @@ def test_compare_report_with_correlation_table(): report2 = generate_report(correlation_table=True) comp_report = report1.compare(report2) renderable = get_correlation_items(comp_report.config, comp_report.description_set) - for cor_items in renderable.content["item"].content["items"]: - diagrams_with_desc, tables = cor_items.content["items"] + for cor_items in renderable.content["items"]: + diagrams, tables = cor_items.content["items"] for table in tables.content["items"]: assert isinstance(table, CorrelationTable) - description = diagrams_with_desc.content["items"].pop() - assert isinstance(description, HTML) - for diagram in diagrams_with_desc.content["items"]: + for diagram in diagrams.content["items"]: assert isinstance(diagram, Image) @@ -84,8 +78,6 @@ def test_compare_report_without_correlation_table(): report2 = generate_report(correlation_table=False) comp_report = report1.compare(report2) renderable = get_correlation_items(comp_report.config, comp_report.description_set) - for cor_items in renderable.content["item"].content["items"]: - description = cor_items.content["items"].pop() - assert isinstance(description, HTML) + for cor_items in renderable.content["items"]: for diagram in cor_items.content["items"]: assert isinstance(diagram, Image)