From e5cd8cfb4b91f22b3435f9830f516e929c4e8d32 Mon Sep 17 00:00:00 2001
From: ricardodcpereira <ricardo.pereira@ydata.ai>
Date: Wed, 7 Dec 2022 10:54:19 +0000
Subject: [PATCH] feat: design improvements on the correlations section

---
 .../flavours/html/correlation_table.py        |   2 +-
 .../html/templates/wrapper/assets/style.css   |   3 +-
 .../report/structure/correlations.py          | 112 +++---------------
 tests/unit/test_correlations.py               |  22 ++--
 4 files changed, 29 insertions(+), 110 deletions(-)
diff --git a/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py b/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py
index cb3307ea7..a78df1820 100644
--- a/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py
+++ b/src/pandas_profiling/report/presentation/flavours/html/correlation_table.py
@@ -5,7 +5,7 @@
 class HTMLCorrelationTable(CorrelationTable):
     def render(self) -> str:
         correlation_matrix_html = self.content["correlation_matrix"].to_html(
-            classes="correlation-table table table-striped"
+            classes="correlation-table table table-striped", float_format="{:.3f}".format
         )
         return templates.template("correlation_table.html").render(
             **self.content, correlation_matrix_html=correlation_matrix_html
diff --git a/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css b/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css
index a598d3b43..47aacb881 100644
--- a/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css
+++ b/src/pandas_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css
@@ -18,7 +18,7 @@ body {
     border-top: hidden;
 }
 
-.row.spacing, [id^=correlations_tab] {
+.row.spacing {
     padding: 2em 1em;
 }
 
@@ -249,6 +249,7 @@ table.freq.mini {
 }
 .img-responsive{
     max-width: 99%;
+    min-width: 99%;
 }
 .footer-text{
     padding:20px;
diff --git a/src/pandas_profiling/report/structure/correlations.py b/src/pandas_profiling/report/structure/correlations.py
index 3160ebad0..8f7189619 100644
--- a/src/pandas_profiling/report/structure/correlations.py
+++ b/src/pandas_profiling/report/structure/correlations.py
@@ -2,12 +2,9 @@
 
 from pandas_profiling.config import Settings
 from pandas_profiling.report.presentation.core import (
-    HTML,
-    Collapse,
     Container,
     CorrelationTable,
     Image,
-    ToggleButton,
 )
 from pandas_profiling.report.presentation.core.renderable import Renderable
 from pandas_profiling.visualisation import plot
@@ -25,62 +22,19 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
     """
     items: List[Renderable] = []
 
-    pearson_description = (
-        "The Pearson's correlation coefficient (<em>r</em>) is a measure of linear correlation "
-        "between two variables. It's value lies between -1 and +1, -1 indicating total negative "
-        "linear correlation, 0 indicating no linear correlation and 1 indicating total positive "
-        "linear correlation. Furthermore, <em>r</em> is invariant under separate changes in location "
-        "and scale of the two variables, implying that for a linear function the angle to the "
-        "x-axis does not affect <em>r</em>.<br /><br />To calculate <em>r</em> for two "
-        "variables <em>X</em> and <em>Y</em>, one divides the covariance of <em>X</em> and "
-        "<em>Y</em> by the product of their standard deviations. "
-    )
-    spearman_description = """The Spearman's rank correlation coefficient (<em>ρ</em>) is a measure of monotonic 
-    correlation between two variables, and is therefore better in catching nonlinear monotonic correlations than 
-    Pearson's <em>r</em>. It's value lies between -1 and +1, -1 indicating total negative monotonic correlation, 
-    0 indicating no monotonic correlation and 1 indicating total positive monotonic correlation.<br /><br />To 
-    calculate <em>ρ</em> for two variables <em>X</em> and <em>Y</em>, one divides the covariance of the rank 
-    variables of <em>X</em> and <em>Y</em> by the product of their standard deviations. """
-
-    kendall_description = """Similarly to Spearman's rank correlation coefficient, the Kendall rank correlation 
-    coefficient (<em>τ</em>) measures ordinal association between two variables. It's value lies between -1 and +1, 
-    -1 indicating total negative correlation, 0 indicating no correlation and 1 indicating total positive correlation.
-    <br /><br />To calculate <em>τ</em> for two variables <em>X</em> and <em>Y</em>, one determines the number of 
-    concordant and discordant pairs of observations. <em>τ</em> is given by the number of concordant pairs minus the 
-    discordant pairs divided by the total number of pairs."""
-
-    phi_k_description = """Phik (φk) is a new and practical correlation coefficient that works consistently between categorical, ordinal and interval variables, captures non-linear dependency and reverts to the Pearson correlation coefficient in case
-    of a bivariate normal input distribution. There is extensive documentation available <a href='https://phik.readthedocs.io/en/latest/index.html'>here</a>."""
-
-    cramers_description = """Cramér's V is an association measure for nominal random variables. The coefficient ranges from 0 to 1, with 0 indicating independence and 1 indicating perfect association.
-    The empirical estimators used for Cramér's V have been proved to be biased, even for large samples.
-    We use a bias-corrected measure that has been proposed by Bergsma in 2013 that can be found <a href='http://stats.lse.ac.uk/bergsma/pdf/cramerV3.pdf'>here</a>."""
-
-    auto_description = """
-                            The auto setting is an interpretable pairwise 
-                                column metric of the following mapping:
-                        <ul>
-                            <li> Categorical-Categorical     : Cramer's V, <strong> [0,1] </strong> <br /> </li> 
-                            <li> Numerical-Categorical       : Cramer's V, <strong> [0,1] </strong> (using a discretized numerical column) <br /> </li> 
-                            <li> Numerical-Numerical         : Spearman's ρ, <strong> [-1,1] </strong> <br /> </li> 
-                        </ul>
-                        The number of bins used in the discretization for the Numerical-Categorical column pair can be changed
-                        using config.correlations["auto"].n_bins. The number of bins affects the granularity of the association you wish to measure. <br><br>
-                        This configuration uses the recommended metric for each pair of columns."""
-
     key_to_data = {
-        "pearson": (-1, "Pearson's r", pearson_description),
-        "spearman": (-1, "Spearman's ρ", spearman_description),
-        "kendall": (-1, "Kendall's τ", kendall_description),
-        "phi_k": (0, "Phik (φk)", phi_k_description),
-        "cramers": (0, "Cramér's V (φc)", cramers_description),
-        "auto": (-1, "Auto", auto_description),
+        "pearson": (-1, "Pearson's r"),
+        "spearman": (-1, "Spearman's ρ"),
+        "kendall": (-1, "Kendall's τ"),
+        "phi_k": (0, "Phik (φk)"),
+        "cramers": (0, "Cramér's V (φc)"),
+        "auto": (-1, "Auto"),
     }
 
     image_format = config.plot.image_format
 
     for key, item in summary["correlations"].items():
-        vmin, name, description = key_to_data[key]
+        vmin, name = key_to_data[key]
 
         if isinstance(item, list):
             diagrams: List[Renderable] = []
@@ -95,19 +49,12 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
                 )
                 diagrams.append(diagram)
 
-            desc = HTML(
-                f'<div style="padding:20px" class="text-muted">{description}</div>',
-                anchor_id=f"{key}_html",
-                classes="correlation-description",
-                name=name,
-            )
-
-            diagrams_with_desc = Container(
-                diagrams + [desc],
+            diagrams_grid = Container(
+                diagrams,
                 anchor_id=f"{key}_diagram_with_desc",
                 name="Heatmap" if config.correlation_table else name,
                 sequence_type="batch_grid",
-                batch_size=len(config.html.style._labels) + 1,
+                batch_size=len(config.html.style._labels),
             )
 
             if config.correlation_table:
@@ -129,7 +76,7 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
                 )
 
                 diagrams_tables_tab = Container(
-                    [diagrams_with_desc, tables_tab],
+                    [diagrams_grid, tables_tab],
                     anchor_id=f"{key}_diagram_table",
                     name=name,
                     sequence_type="tabs",
@@ -137,28 +84,15 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
 
                 items.append(diagrams_tables_tab)
             else:
-                items.append(diagrams_with_desc)
+                items.append(diagrams_grid)
         else:
             diagram = Image(
                 plot.correlation_matrix(config, item, vmin=vmin),
                 image_format=image_format,
                 alt=name,
                 anchor_id=f"{key}_diagram",
-                name=name,
-                classes="correlation-diagram",
-            )
-
-            desc = HTML(
-                f'<div style="padding:20px" class="text-muted"><h3>{name}</h3>{description}</div>',
-                anchor_id=f"{key}_html",
-                classes="correlation-description",
-            )
-
-            diagram_with_desc = Container(
-                [diagram, desc],
-                anchor_id=f"{key}_diagram_with_desc",
                 name="Heatmap" if config.correlation_table else name,
-                sequence_type="grid",
+                classes="correlation-diagram",
             )
 
             if config.correlation_table:
@@ -167,7 +101,7 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
                 )
 
                 diagram_table_tabs = Container(
-                    [diagram_with_desc, table],
+                    [diagram, table],
                     anchor_id=f"{key}_diagram_table",
                     name=name,
                     sequence_type="tabs",
@@ -175,24 +109,16 @@ def get_correlation_items(config: Settings, summary: dict) -> Optional[Renderabl
 
                 items.append(diagram_table_tabs)
             else:
-                items.append(diagram_with_desc)
+                items.append(diagram)
 
     corr = Container(
         items,
         sequence_type="tabs",
-        name="Correlations Tab",
+        name="Correlations",
         anchor_id="correlations_tab",
     )
 
     if len(items) > 0:
-        btn = ToggleButton(
-            "Show correlation descriptions",
-            anchor_id="toggle-correlation-description",
-            name="Show correlation descriptions",
-        )
-
-        return Collapse(
-            name="Correlations", anchor_id="correlations", button=btn, item=corr
-        )
-    else:
-        return None
+        return corr
+
+    return None
diff --git a/tests/unit/test_correlations.py b/tests/unit/test_correlations.py
index 73e8fd7b6..a9b655bec 100644
--- a/tests/unit/test_correlations.py
+++ b/tests/unit/test_correlations.py
@@ -47,21 +47,17 @@ def generate_report(correlation_table: bool):
 def test_standard_report_with_correlation_table():
     report = generate_report(correlation_table=True)
     renderable = get_correlation_items(report.config, report.description_set)
-    for cor_item in renderable.content["item"].content["items"]:
-        diagram_with_desc, table = cor_item.content["items"]
+    for cor_item in renderable.content["items"]:
+        diagram, table = cor_item.content["items"]
         assert isinstance(table, CorrelationTable)
-        diagram, description = diagram_with_desc.content["items"]
         assert isinstance(diagram, Image)
-        assert isinstance(description, HTML)
 
 
 def test_standard_report_without_correlation_table():
     report = generate_report(correlation_table=False)
     renderable = get_correlation_items(report.config, report.description_set)
-    for cor_item in renderable.content["item"].content["items"]:
-        diagram, description = cor_item.content["items"]
+    for diagram in renderable.content["items"]:
         assert isinstance(diagram, Image)
-        assert isinstance(description, HTML)
 
 
 def test_compare_report_with_correlation_table():
@@ -69,13 +65,11 @@ def test_compare_report_with_correlation_table():
     report2 = generate_report(correlation_table=True)
     comp_report = report1.compare(report2)
     renderable = get_correlation_items(comp_report.config, comp_report.description_set)
-    for cor_items in renderable.content["item"].content["items"]:
-        diagrams_with_desc, tables = cor_items.content["items"]
+    for cor_items in renderable.content["items"]:
+        diagrams, tables = cor_items.content["items"]
         for table in tables.content["items"]:
             assert isinstance(table, CorrelationTable)
-        description = diagrams_with_desc.content["items"].pop()
-        assert isinstance(description, HTML)
-        for diagram in diagrams_with_desc.content["items"]:
+        for diagram in diagrams.content["items"]:
             assert isinstance(diagram, Image)
 
 
@@ -84,8 +78,6 @@ def test_compare_report_without_correlation_table():
     report2 = generate_report(correlation_table=False)
     comp_report = report1.compare(report2)
     renderable = get_correlation_items(comp_report.config, comp_report.description_set)
-    for cor_items in renderable.content["item"].content["items"]:
-        description = cor_items.content["items"].pop()
-        assert isinstance(description, HTML)
+    for cor_items in renderable.content["items"]:
         for diagram in cor_items.content["items"]:
             assert isinstance(diagram, Image)