From 40da9f192196c5a3d64d1268917dc5617b2f215a Mon Sep 17 00:00:00 2001 From: sbrugman Date: Tue, 14 Jan 2020 01:17:35 +0100 Subject: [PATCH 1/7] Handling for constant (values, length) variables - Constant length warning / message: Inform the user for categorials with constant length - Variables with constant values are "ignored" (strikethrough and grey) --- src/pandas_profiling/model/messages.py | 21 ++++++++++++++++++- .../report/structure/report.py | 6 +++++- .../structure/variables/render_generic.py | 1 - 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/pandas_profiling/model/messages.py b/src/pandas_profiling/model/messages.py index 13c81ad75..437a715d6 100644 --- a/src/pandas_profiling/model/messages.py +++ b/src/pandas_profiling/model/messages.py @@ -52,6 +52,10 @@ class MessageType(Enum): """This variable is likely a datetime, but treated as categorical.""" UNIQUE = 12 + """This variable has unique values.""" + + CONSTANT_LENGTH = 13 + """This variable has a constant length""" class Message(object): @@ -161,7 +165,7 @@ def check_variable_messages(col: str, description: dict) -> List[Message]: ) # Categorical - if description["type"] in {Variable.TYPE_CAT}: + if description["type"] == Variable.TYPE_CAT: if description["date_warning"]: messages.append( Message(column_name=col, message_type=MessageType.TYPE_DATE, values={}) @@ -180,6 +184,21 @@ def check_variable_messages(col: str, description: dict) -> List[Message]: ) ) + # Constant length + if ( + "composition" in description + and description["composition"]["min_length"] + == description["composition"]["max_length"] + ): + messages.append( + Message( + column_name=col, + message_type=MessageType.CONSTANT_LENGTH, + values=description, + fields={"composition_min_length", "composition_max_length"}, + ) + ) + # Numerical if description["type"] in {Variable.TYPE_NUM}: # Skewness diff --git a/src/pandas_profiling/report/structure/report.py b/src/pandas_profiling/report/structure/report.py index e6d799032..20f914aee 100644 --- a/src/pandas_profiling/report/structure/report.py +++ b/src/pandas_profiling/report/structure/report.py @@ -14,6 +14,7 @@ ImagePath, Generic, ) +from pandas_profiling.model.messages import MessageType from pandas_profiling.report.structure.variables import ( render_boolean, render_categorical, @@ -144,13 +145,16 @@ def fmt_warning(warning): # Per type template variables template_variables.update(type_to_func[summary["type"]](template_variables)) + # Ignore these + ignore = summary["type"] == Generic or MessageType.CONST.value in warnings + templs.append( Preview( template_variables["top"], template_variables["bottom"], anchor_id=template_variables["varid"], name=idx, - ignore="ignore" in template_variables, + ignore=ignore, ) ) diff --git a/src/pandas_profiling/report/structure/variables/render_generic.py b/src/pandas_profiling/report/structure/variables/render_generic.py index 2889e6df0..79bc6dca5 100644 --- a/src/pandas_profiling/report/structure/variables/render_generic.py +++ b/src/pandas_profiling/report/structure/variables/render_generic.py @@ -37,7 +37,6 @@ def render_generic(summary): return { "top": Sequence([info, table, HTML("")], sequence_type="grid"), "bottom": None, - "ignore": "ignore", } # Add class Ignore From 6c4f9ad7546a14462c03116ed1b623b6cdaf9bb5 Mon Sep 17 00:00:00 2001 From: sbrugman Date: Tue, 14 Jan 2020 02:01:30 +0100 Subject: [PATCH 2/7] Configuration for rejected variables and warnings - Rejected variables can be disabled (no longer grey or strikethrough) - Warnings in the overview are automatically collapsed if over 20 item (configurable), can also be disabled completely by setting the limit to 0. (#281) --- src/pandas_profiling/config_dark.yaml | 9 ++++++ src/pandas_profiling/config_default.yaml | 9 ++++++ src/pandas_profiling/config_minimal.yaml | 9 ++++++ src/pandas_profiling/model/messages.py | 10 +++++++ .../report/presentation/core/dataset.py | 3 +- .../html/templates/overview/overview.html | 8 ++--- .../warnings/warning_unsupported.html | 2 +- .../report/structure/report.py | 30 +++++++++++-------- 8 files changed, 61 insertions(+), 19 deletions(-) diff --git a/src/pandas_profiling/config_dark.yaml b/src/pandas_profiling/config_dark.yaml index 5b2c91111..a124ab675 100644 --- a/src/pandas_profiling/config_dark.yaml +++ b/src/pandas_profiling/config_dark.yaml @@ -86,10 +86,19 @@ n_obs_unique: 5 n_extreme_obs: 5 n_freq_table_max: 10 +# Configuration related to the samples area samples: head: 10 tail: 10 +# Configuration related to the warning overview (top) and per variable warnings +warnings: + # Set to zero to disable showing warnings + collapse_if_more: 20 + +# Configuration related to the rejection of variables +reject_variables: True + # When in a Jupyter notebook notebook: iframe: diff --git a/src/pandas_profiling/config_default.yaml b/src/pandas_profiling/config_default.yaml index fbe9b2a45..3297356ab 100644 --- a/src/pandas_profiling/config_default.yaml +++ b/src/pandas_profiling/config_default.yaml @@ -86,10 +86,19 @@ n_obs_unique: 5 n_extreme_obs: 5 n_freq_table_max: 10 +# Configuration related to the samples area samples: head: 10 tail: 10 +# Configuration related to the warning overview (top) and per variable warnings +warnings: + # Set to zero to disable showing warnings + collapse_if_more: 20 + +# Configuration related to the rejection of variables +reject_variables: True + # When in a Jupyter notebook notebook: iframe: diff --git a/src/pandas_profiling/config_minimal.yaml b/src/pandas_profiling/config_minimal.yaml index b8d589744..290320823 100644 --- a/src/pandas_profiling/config_minimal.yaml +++ b/src/pandas_profiling/config_minimal.yaml @@ -86,10 +86,19 @@ n_obs_unique: 5 n_extreme_obs: 5 n_freq_table_max: 10 +# Configuration related to the samples area samples: head: 10 tail: 10 +# Configuration related to the warning overview (top) and per variable warnings +warnings: + # Set to zero to disable showing warnings + collapse_if_more: 20 + +# Configuration related to the rejection of variables +reject_variables: True + # When in a Jupyter notebook notebook: iframe: diff --git a/src/pandas_profiling/model/messages.py b/src/pandas_profiling/model/messages.py index 13c81ad75..46d52563e 100644 --- a/src/pandas_profiling/model/messages.py +++ b/src/pandas_profiling/model/messages.py @@ -73,6 +73,16 @@ def __init__( self.column_name = column_name self.anchor_id = hash(column_name) + def fmt(self): + # TODO: render in template + name = self.message_type.name.replace("_", " ") + if name == "HIGH CORRELATION": + name = 'HIGH CORRELATION'.format( + num=len(self.values["fields"]), + title=", ".join(self.values["fields"]), + ) + return name + def check_table_messages(table: dict) -> List[Message]: """Checks the overall dataset for warnings. diff --git a/src/pandas_profiling/report/presentation/core/dataset.py b/src/pandas_profiling/report/presentation/core/dataset.py index 5405b80a8..b40116e23 100644 --- a/src/pandas_profiling/report/presentation/core/dataset.py +++ b/src/pandas_profiling/report/presentation/core/dataset.py @@ -4,7 +4,7 @@ class Dataset(ItemRenderer): - def __init__(self, package, date, values, messages, variables, **kwargs): + def __init__(self, package, date, values, messages, collapse_warnings, variables, **kwargs): super().__init__( "dataset", { @@ -12,6 +12,7 @@ def __init__(self, package, date, values, messages, variables, **kwargs): "values": values, "messages": messages, "variables": variables, + "collapse_warnings": collapse_warnings, "package": package, }, **kwargs diff --git a/src/pandas_profiling/report/presentation/flavours/html/templates/overview/overview.html b/src/pandas_profiling/report/presentation/flavours/html/templates/overview/overview.html index 00d76e758..537036a76 100644 --- a/src/pandas_profiling/report/presentation/flavours/html/templates/overview/overview.html +++ b/src/pandas_profiling/report/presentation/flavours/html/templates/overview/overview.html @@ -55,7 +55,7 @@ -
+