Skip to content

Commit

Permalink
Configuration for rejected variables and warnings
Browse files Browse the repository at this point in the history
- Rejected variables can be disabled (no longer grey or strikethrough)
- Warnings in the overview are automatically collapsed if over 20 item (configurable), can also be disabled completely by setting the limit to 0.

(#281)
  • Loading branch information
sbrugman committed Jan 14, 2020
1 parent 9ab2122 commit 6c4f9ad
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 19 deletions.
9 changes: 9 additions & 0 deletions src/pandas_profiling/config_dark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,19 @@ n_obs_unique: 5
n_extreme_obs: 5
n_freq_table_max: 10

# Configuration related to the samples area
samples:
head: 10
tail: 10

# Configuration related to the warning overview (top) and per variable warnings
warnings:
# Set to zero to disable showing warnings
collapse_if_more: 20

# Configuration related to the rejection of variables
reject_variables: True

# When in a Jupyter notebook
notebook:
iframe:
Expand Down
9 changes: 9 additions & 0 deletions src/pandas_profiling/config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,19 @@ n_obs_unique: 5
n_extreme_obs: 5
n_freq_table_max: 10

# Configuration related to the samples area
samples:
head: 10
tail: 10

# Configuration related to the warning overview (top) and per variable warnings
warnings:
# Set to zero to disable showing warnings
collapse_if_more: 20

# Configuration related to the rejection of variables
reject_variables: True

# When in a Jupyter notebook
notebook:
iframe:
Expand Down
9 changes: 9 additions & 0 deletions src/pandas_profiling/config_minimal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,19 @@ n_obs_unique: 5
n_extreme_obs: 5
n_freq_table_max: 10

# Configuration related to the samples area
samples:
head: 10
tail: 10

# Configuration related to the warning overview (top) and per variable warnings
warnings:
# Set to zero to disable showing warnings
collapse_if_more: 20

# Configuration related to the rejection of variables
reject_variables: True

# When in a Jupyter notebook
notebook:
iframe:
Expand Down
10 changes: 10 additions & 0 deletions src/pandas_profiling/model/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ def __init__(
self.column_name = column_name
self.anchor_id = hash(column_name)

def fmt(self):
# TODO: render in template
name = self.message_type.name.replace("_", " ")
if name == "HIGH CORRELATION":
name = '<abbr title="This variable has a high correlation with {num} fields: {title}">HIGH CORRELATION</abbr>'.format(
num=len(self.values["fields"]),
title=", ".join(self.values["fields"]),
)
return name


def check_table_messages(table: dict) -> List[Message]:
"""Checks the overall dataset for warnings.
Expand Down
3 changes: 2 additions & 1 deletion src/pandas_profiling/report/presentation/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@


class Dataset(ItemRenderer):
def __init__(self, package, date, values, messages, variables, **kwargs):
def __init__(self, package, date, values, messages, collapse_warnings, variables, **kwargs):
super().__init__(
"dataset",
{
"date": date,
"values": values,
"messages": messages,
"variables": variables,
"collapse_warnings": collapse_warnings,
"package": package,
},
**kwargs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
</a>
</div>

<div id="reproduction" class="row collapse">
<div id="reproduction" class="row collapse" aria-expanded="false">
<div class="col-sm-12">
<p class="h2">Reproduction info</p>
<table class="table table-condensed stats">
Expand Down Expand Up @@ -84,13 +84,13 @@
{% if messages %}
<div class="col-sm-12 text-right">
<a role="button" data-toggle="collapse" data-target="#warnings"
aria-expanded="true" aria-controls="collapseExample" class="">
aria-expanded="{% if collapse_warnings %}false{% else %}true{% endif %}" aria-controls="collapseExample" class="">
Toggle Warnings
</a>
</div>

<div id="warnings" class="row collapse in" aria-expanded="true">
<div class="col-sm-12" style="padding-left: 1em;">
<div id="warnings" class="row collapse{% if not collapse_warnings %} in{% endif %}" aria-expanded="{% if collapse_warnings %}false{% else %}true{% endif %}">
<div class="col-sm-12" style="padding-left: 1em;}">
<p class="h2">Warnings</p>
<table class="table table-condensed list-warnings">
{% for message in messages %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
<a class="anchor" href="#pp_var_{{ message.anchor_id }}"><code>{{ message.column_name }}</code></a> is an unsupported type, check if it needs cleaning or further analysis
</td>
<td>
<span class="label label-warning">Warning</span>
<span class="label label-warning">Rejected</span>
</td>
30 changes: 17 additions & 13 deletions src/pandas_profiling/report/structure/report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Generate the report."""

import pandas_profiling.visualisation.plot as plot
from pandas_profiling.config import config
from pandas_profiling.model.base import (
Boolean,
Real,
Expand Down Expand Up @@ -110,22 +111,13 @@ def render_variables_section(dataframe_summary: dict) -> list:
templs = []

for idx, summary in dataframe_summary["variables"].items():
# TODO: move to render
# Common template variables
def fmt_warning(warning):
name = warning.message_type.name.replace("_", " ")
if name == "HIGH CORRELATION":
name = '<abbr title="This variable has a high correlation with {num} fields: {title}">HIGH CORRELATION</abbr>'.format(
num=len(warning.values["fields"]),
title=", ".join(warning.values["fields"]),
)
return name

warnings = [
fmt_warning(warning)
warning.fmt()
for warning in dataframe_summary["messages"]
if warning.column_name == idx
]

warn_fields = [
field
for warning in dataframe_summary["messages"]
Expand All @@ -144,13 +136,18 @@ def fmt_warning(warning):
# Per type template variables
template_variables.update(type_to_func[summary["type"]](template_variables))

if config['reject_variables'].get(bool):
ignore = "ignore" in template_variables
else:
ignore = False

templs.append(
Preview(
template_variables["top"],
template_variables["bottom"],
anchor_id=template_variables["varid"],
name=idx,
ignore="ignore" in template_variables,
ignore=ignore,
)
)

Expand Down Expand Up @@ -190,13 +187,20 @@ def get_report_structure(date, sample: dict, summary: dict) -> Renderable:
The profile report in HTML format
"""

collapse_warnings = config['warnings']['collapse_if_more'].get(int)
if collapse_warnings == 0:
warnings = []
else:
warnings = summary["messages"]

sections = Sequence(
[
Dataset(
package=summary["package"],
date=date,
values=summary["table"],
messages=summary["messages"],
messages=warnings,
collapse_warnings=len(warnings) > collapse_warnings,
variables=summary["variables"],
name="Overview",
anchor_id="overview",
Expand Down

0 comments on commit 6c4f9ad

Please sign in to comment.