diff --git a/docs/controller/console.html b/docs/controller/console.html index df74ab6c4..dceb455a9 100644 --- a/docs/controller/console.html +++ b/docs/controller/console.html @@ -100,7 +100,7 @@

Module pandas_profiling.controller.console

if not args.silent: import webbrowser - webbrowser.open_new_tab(p.file.name) + webbrowser.open_new_tab(args.output_file)
@@ -144,7 +144,7 @@

Args

if not args.silent: import webbrowser - webbrowser.open_new_tab(p.file.name) + webbrowser.open_new_tab(args.output_file)
diff --git a/docs/index.html b/docs/index.html index 22f1db52b..2e1ac75f5 100644 --- a/docs/index.html +++ b/docs/index.html @@ -157,7 +157,6 @@

Dependencies

.. include:: ../README.md """ -import html import sys import warnings @@ -174,6 +173,7 @@

Dependencies

from pandas_profiling.controller import pandas_decorator import pandas_profiling.view.templates as templates from pandas_profiling.model.describe import describe as describe_df +from pandas_profiling.view.notebook import display_notebook_iframe from pandas_profiling.view.report import to_html @@ -319,50 +319,13 @@

Dependencies

"""Used to output the HTML representation to a Jupyter notebook. When config.notebook.iframe.attribute is "src", this function creates a temporary HTML file in `./tmp/profile_[hash].html` and returns an Iframe pointing to that contents. - When config.notebook.iframe.attribute is "srco", the same HTML is injected in the "srcdoc" attribute of + When config.notebook.iframe.attribute is "srcdoc", the same HTML is injected in the "srcdoc" attribute of the Iframe. Notes: This constructions solves problems with conflicting stylesheets and navigation links. """ - attribute = config["notebook"]["iframe"]["attribute"].get(str) - if attribute == "src": - tmp_file = Path("./ipynb_tmp") / self.get_unique_file_name() - tmp_file.parent.mkdir(exist_ok=True) - self.to_file(tmp_file) - from IPython.lib.display import IFrame, display - - display( - IFrame( - str(tmp_file), - width=config["notebook"]["iframe"]["width"].get(str), - height=config["notebook"]["iframe"]["height"].get(str), - ) - ) - elif attribute: - from IPython.core.display import HTML, display - - iframe = """ - <iframe - width="{width}" - height="{height}" - srcdoc="{src}" - frameborder="0" - allowfullscreen - ></iframe> - """ - iframe = iframe.format( - width=config["notebook"]["iframe"]["width"].get(str), - height=config["notebook"]["iframe"]["height"].get(str), - src=html.escape(self.to_html()), - ) - display(HTML(iframe)) - else: - raise ValueError( - 'Iframe Attribute can be "src" or "srcdoc" (current: {}).'.format( - attribute - ) - ) + display_notebook_iframe(self) def __repr__(self): """Override so that Jupyter Notebook does not print the object.""" @@ -556,50 +519,13 @@

Classes

"""Used to output the HTML representation to a Jupyter notebook. When config.notebook.iframe.attribute is "src", this function creates a temporary HTML file in `./tmp/profile_[hash].html` and returns an Iframe pointing to that contents. - When config.notebook.iframe.attribute is "srco", the same HTML is injected in the "srcdoc" attribute of + When config.notebook.iframe.attribute is "srcdoc", the same HTML is injected in the "srcdoc" attribute of the Iframe. Notes: This constructions solves problems with conflicting stylesheets and navigation links. """ - attribute = config["notebook"]["iframe"]["attribute"].get(str) - if attribute == "src": - tmp_file = Path("./ipynb_tmp") / self.get_unique_file_name() - tmp_file.parent.mkdir(exist_ok=True) - self.to_file(tmp_file) - from IPython.lib.display import IFrame, display - - display( - IFrame( - str(tmp_file), - width=config["notebook"]["iframe"]["width"].get(str), - height=config["notebook"]["iframe"]["height"].get(str), - ) - ) - elif attribute: - from IPython.core.display import HTML, display - - iframe = """ - <iframe - width="{width}" - height="{height}" - srcdoc="{src}" - frameborder="0" - allowfullscreen - ></iframe> - """ - iframe = iframe.format( - width=config["notebook"]["iframe"]["width"].get(str), - height=config["notebook"]["iframe"]["height"].get(str), - src=html.escape(self.to_html()), - ) - display(HTML(iframe)) - else: - raise ValueError( - 'Iframe Attribute can be "src" or "srcdoc" (current: {}).'.format( - attribute - ) - ) + display_notebook_iframe(self) def __repr__(self): """Override so that Jupyter Notebook does not print the object.""" diff --git a/docs/model/describe.html b/docs/model/describe.html index c162a022c..5f0138074 100644 --- a/docs/model/describe.html +++ b/docs/model/describe.html @@ -414,10 +414,14 @@

Module pandas_profiling.model.describe

A dictionary containing the base64 encoded plots for each diagram that is active in the config (matrix, bar, heatmap, dendrogram). """ missing_map = { - "matrix": {"func": plot.missing_matrix, "min_missing": 0}, - "bar": {"func": plot.missing_bar, "min_missing": 0}, - "heatmap": {"func": plot.missing_heatmap, "min_missing": 2}, - "dendrogram": {"func": plot.missing_dendrogram, "min_missing": 1}, + "bar": {"func": plot.missing_bar, "min_missing": 0, "name": "Count"}, + "matrix": {"func": plot.missing_matrix, "min_missing": 0, "name": "Matrix"}, + "heatmap": {"func": plot.missing_heatmap, "min_missing": 2, "name": "Heatmap"}, + "dendrogram": { + "func": plot.missing_dendrogram, + "min_missing": 1, + "name": "Dendrogram", + }, } missing = {} @@ -430,7 +434,10 @@

Module pandas_profiling.model.describe

table_stats["n_vars_with_missing"] - table_stats["n_vars_all_missing"] >= settings["min_missing"] ): - missing[name] = settings["func"](df) + missing[name] = { + "name": settings["name"], + "matrix": settings["func"](df), + } return missing @@ -1241,10 +1248,14 @@

Returns

A dictionary containing the base64 encoded plots for each diagram that is active in the config (matrix, bar, heatmap, dendrogram). """ missing_map = { - "matrix": {"func": plot.missing_matrix, "min_missing": 0}, - "bar": {"func": plot.missing_bar, "min_missing": 0}, - "heatmap": {"func": plot.missing_heatmap, "min_missing": 2}, - "dendrogram": {"func": plot.missing_dendrogram, "min_missing": 1}, + "bar": {"func": plot.missing_bar, "min_missing": 0, "name": "Count"}, + "matrix": {"func": plot.missing_matrix, "min_missing": 0, "name": "Matrix"}, + "heatmap": {"func": plot.missing_heatmap, "min_missing": 2, "name": "Heatmap"}, + "dendrogram": { + "func": plot.missing_dendrogram, + "min_missing": 1, + "name": "Dendrogram", + }, } missing = {} @@ -1257,7 +1268,10 @@

Returns

table_stats["n_vars_with_missing"] - table_stats["n_vars_all_missing"] >= settings["min_missing"] ): - missing[name] = settings["func"](df) + missing[name] = { + "name": settings["name"], + "matrix": settings["func"](df), + } return missing
diff --git a/docs/utils/index.html b/docs/utils/index.html index 8c1c6a19e..dd47a5043 100644 --- a/docs/utils/index.html +++ b/docs/utils/index.html @@ -37,6 +37,10 @@

Sub-modules

Utils for pandas DataFrames.

+
pandas_profiling.utils.notebook
+
+

Utility functions that come in handy when working with Jupyter notebooks

+
pandas_profiling.utils.paths

Paths that are useful throughout the project.

@@ -65,6 +69,7 @@

Index

diff --git a/docs/view/index.html b/docs/view/index.html index 1a28979b5..c3a952883 100644 --- a/docs/view/index.html +++ b/docs/view/index.html @@ -33,6 +33,10 @@

Sub-modules

Formatters are utilities for formatting numbers and certain strings

+
pandas_profiling.view.notebook
+
+

Functionality related to displaying the profile report in Jupyter notebooks.

+
pandas_profiling.view.plot

Plot functions for the profiling report.

@@ -68,6 +72,7 @@

Index

  • Sub-modules

    • pandas_profiling.view.formatters
    • +
    • pandas_profiling.view.notebook
    • pandas_profiling.view.plot
    • pandas_profiling.view.report
    • pandas_profiling.view.templates
    • diff --git a/docs/view/plot.html b/docs/view/plot.html index 536d3a678..83c442676 100644 --- a/docs/view/plot.html +++ b/docs/view/plot.html @@ -228,7 +228,7 @@

      Module pandas_profiling.view.plot

      height = 4 if len(data.columns) > 10: height += int((len(data.columns) - 10) / 5) - height = max(height, 10) + height = min(height, 10) missingno.heatmap( data, @@ -543,7 +543,7 @@

      Returns

      height = 4 if len(data.columns) > 10: height += int((len(data.columns) - 10) / 5) - height = max(height, 10) + height = min(height, 10) missingno.heatmap( data, diff --git a/docs/view/report.html b/docs/view/report.html index e81a35124..1ff5cf763 100644 --- a/docs/view/report.html +++ b/docs/view/report.html @@ -46,7 +46,7 @@

      Module pandas_profiling.view.report

      freqtable: The frequency table. n: The total number of values. table_template: The name of the template. - max_number_to_print: The maximum number of observatios to print. + max_number_to_print: The maximum number of observations to print. nb_col: The number of columns in the grid. (Default value = 6) Returns: @@ -147,7 +147,7 @@

      Module pandas_profiling.view.report

      return templates.template("freq_table.html").render(rows=rows) -def render_overview_html(stats_object: dict) -> str: +def render_overview_section(stats_object: dict) -> str: """Render the overview HTML. Args: @@ -164,45 +164,36 @@

      Module pandas_profiling.view.report

      ) -def render_correlations_html(stats_object: dict) -> str: - """Render the correlations HTML. +def get_correlation_items(stats_object) -> dict: + """Create the list of correlation items Args: - stats_object: The diagrams to display in the correlation component. + stats_object: dict of correlations Returns: - The rendered HTML of the correlations component of the profile. + List of correlation items to show in the interface. """ - values = {} - active = "" + items = {} if "pearson" in stats_object["correlations"]: - if active == "": - active = "pearson" - values["pearson"] = { + items["pearson"] = { "matrix": plot.correlation_matrix(stats_object["correlations"]["pearson"]), "name": "Pearson's r", } if "spearman" in stats_object["correlations"]: - if active == "": - active = "spearman" - values["spearman"] = { + items["spearman"] = { "matrix": plot.correlation_matrix(stats_object["correlations"]["spearman"]), "name": "Spearman's &rho;", } if "kendall" in stats_object["correlations"]: - if active == "": - active = "kendall" - values["kendall"] = { + items["kendall"] = { "matrix": plot.correlation_matrix(stats_object["correlations"]["kendall"]), "name": "Kendall's &tau;", } if "phi_k" in stats_object["correlations"]: - if active == "": - active = "phi_k" - values["phi_k"] = { + items["phi_k"] = { "matrix": plot.correlation_matrix( stats_object["correlations"]["phi_k"], vmin=0 ), @@ -210,9 +201,7 @@

      Module pandas_profiling.view.report

      } if "cramers" in stats_object["correlations"]: - if active == "": - active = "cramers" - values["cramers"] = { + items["cramers"] = { "matrix": plot.correlation_matrix( stats_object["correlations"]["cramers"], vmin=0 ), @@ -220,19 +209,31 @@

      Module pandas_profiling.view.report

      } if "recoded" in stats_object["correlations"]: - if active == "": - active = "recoded" - values["recoded"] = { + items["recoded"] = { "matrix": plot.correlation_matrix( stats_object["correlations"]["recoded"], vmin=0 ), "name": "Recoded", } - return templates.template("correlations.html").render(values=values, active=active) + return items -def render_missing_html(stats_object: dict) -> str: +def render_correlations_section(stats_object: dict) -> str: + """Render the correlations HTML. + + Args: + stats_object: The diagrams to display in the correlation component. + + Returns: + The rendered HTML of the correlations component of the profile. + """ + items = get_correlation_items(stats_object) + + return templates.template("components/tabs.html").render(values=items) + + +def render_missing_section(stats_object: dict) -> str: """Render the missing values HTML. Args: @@ -241,10 +242,12 @@

      Module pandas_profiling.view.report

      Returns: The missing values component HTML. """ - return templates.template("missing.html").render(values=stats_object["missing"]) + return templates.template("components/tabs.html").render( + values=stats_object["missing"] + ) -def render_variables_html(stats_object: dict) -> str: +def render_variables_section(stats_object: dict) -> str: """Render the HTML for each of the variables in the DataFrame. Args: @@ -281,7 +284,6 @@

      Module pandas_profiling.view.report

      formatted_values["row_classes"]["missing"] = "alert" if row["type"] in {Variable.TYPE_NUM, Variable.TYPE_DATE}: - formatted_values["histogram"] = histogram(row["histogramdata"], row) formatted_values["mini_histogram"] = mini_histogram( row["histogramdata"], row @@ -364,7 +366,28 @@

      Module pandas_profiling.view.report

      return rows_html -def render_sample_html(sample: dict) -> str: +def get_sample_items(sample: dict): + """Create the list of sample items + + Args: + sample: dict of samples + + Returns: + List of sample items to show in the interface. + """ + items = {} + names = {"head": "First rows", "tail": "Last rows"} + for key in sample: + items[key] = { + "name": names[key], + "value": '<div id="sample-container" class="col-sm-12">{}</div>'.format( + sample[key].to_html(classes="sample table table-striped") + ), + } + return items + + +def render_sample_section(sample: dict) -> str: """Render the sample HTML Args: @@ -373,13 +396,9 @@

      Module pandas_profiling.view.report

      Returns: The HTML rendering of the samples. """ - formatted_samples = {} - for key in sample: - formatted_samples[key] = sample[key].to_html(classes="sample table-striped") - sample_html = templates.template("sample.html").render(values=formatted_samples) - # Previously, we only displayed the first samples. - # sample_html = templates.template('sample.html').render(sample_table_html=sample.to_html(classes="sample")) - return sample_html + items = get_sample_items(sample) + + return templates.template("components/list.html").render(values=items) def to_html(sample: dict, stats_object: dict) -> str: @@ -407,17 +426,37 @@

      Module pandas_profiling.view.report

      "stats_object badly formatted. Did you generate this using the pandas_profiling.describe() function?" ) - render_htmls = { - "overview_html": render_overview_html(stats_object), - "rows_html": render_variables_html(stats_object), - "correlations_html": render_correlations_html(stats_object), - "missing_html": render_missing_html(stats_object), - "sample_html": render_sample_html(sample), - "full_width": config["style"]["full_width"].get(bool), - } - - # TODO: should be done in the template - return templates.template("base.html").render(render_htmls) + sections = [ + { + "title": "Overview", + "anchor_id": "overview", + "content": render_overview_section(stats_object), + }, + { + "title": "Variables", + "anchor_id": "variables", + "content": render_variables_section(stats_object), + }, + { + "title": "Correlations", + "anchor_id": "correlations", + "content": render_correlations_section(stats_object), + }, + { + "title": "Missing values", + "anchor_id": "missing", + "content": render_missing_section(stats_object), + }, + { + "title": "Sample", + "anchor_id": "sample", + "content": render_sample_section(sample), + }, + ] + + return templates.template("base.html").render( + sections=sections, full_width=config["style"]["full_width"].get(bool) + )
  • @@ -499,7 +538,7 @@

    Args

    table_template
    The name of the template.
    max_number_to_print
    -
    The maximum number of observatios to print.
    +
    The maximum number of observations to print.
    nb_col
    The number of columns in the grid. (Default value = 6)
    @@ -517,7 +556,7 @@

    Returns

    freqtable: The frequency table. n: The total number of values. table_template: The name of the template. - max_number_to_print: The maximum number of observatios to print. + max_number_to_print: The maximum number of observations to print. nb_col: The number of columns in the grid. (Default value = 6) Returns: @@ -582,59 +621,50 @@

    Returns

    ) -
    -def render_correlations_html(stats_object) +
    +def get_correlation_items(stats_object)
    -

    Render the correlations HTML.

    +

    Create the list of correlation items

    Args

    stats_object
    -
    The diagrams to display in the correlation component.
    +
    dict of correlations

    Returns

    -

    The rendered HTML of the correlations component of the profile.

    +

    List of correlation items to show in the interface.

    Source code -
    def render_correlations_html(stats_object: dict) -> str:
    -    """Render the correlations HTML.
    +
    def get_correlation_items(stats_object) -> dict:
    +    """Create the list of correlation items
     
         Args:
    -        stats_object: The diagrams to display in the correlation component.
    +        stats_object: dict of correlations
     
         Returns:
    -        The rendered HTML of the correlations component of the profile.
    +        List of correlation items to show in the interface.
         """
    -    values = {}
    -    active = ""
    +    items = {}
         if "pearson" in stats_object["correlations"]:
    -        if active == "":
    -            active = "pearson"
    -        values["pearson"] = {
    +        items["pearson"] = {
                 "matrix": plot.correlation_matrix(stats_object["correlations"]["pearson"]),
                 "name": "Pearson's r",
             }
     
         if "spearman" in stats_object["correlations"]:
    -        if active == "":
    -            active = "spearman"
    -        values["spearman"] = {
    +        items["spearman"] = {
                 "matrix": plot.correlation_matrix(stats_object["correlations"]["spearman"]),
                 "name": "Spearman's &rho;",
             }
     
         if "kendall" in stats_object["correlations"]:
    -        if active == "":
    -            active = "kendall"
    -        values["kendall"] = {
    +        items["kendall"] = {
                 "matrix": plot.correlation_matrix(stats_object["correlations"]["kendall"]),
                 "name": "Kendall's &tau;",
             }
     
         if "phi_k" in stats_object["correlations"]:
    -        if active == "":
    -            active = "phi_k"
    -        values["phi_k"] = {
    +        items["phi_k"] = {
                 "matrix": plot.correlation_matrix(
                     stats_object["correlations"]["phi_k"], vmin=0
                 ),
    @@ -642,9 +672,7 @@ 

    Returns

    } if "cramers" in stats_object["correlations"]: - if active == "": - active = "cramers" - values["cramers"] = { + items["cramers"] = { "matrix": plot.correlation_matrix( stats_object["correlations"]["cramers"], vmin=0 ), @@ -652,20 +680,81 @@

    Returns

    } if "recoded" in stats_object["correlations"]: - if active == "": - active = "recoded" - values["recoded"] = { + items["recoded"] = { "matrix": plot.correlation_matrix( stats_object["correlations"]["recoded"], vmin=0 ), "name": "Recoded", } - return templates.template("correlations.html").render(values=values, active=active)
    + return items
    +
    +
    +
    +def get_sample_items(sample) +
    +
    +

    Create the list of sample items

    +

    Args

    +
    +
    sample
    +
    dict of samples
    +
    +

    Returns

    +

    List of sample items to show in the interface.

    +
    +Source code +
    def get_sample_items(sample: dict):
    +    """Create the list of sample items
    +
    +    Args:
    +        sample: dict of samples
    +
    +    Returns:
    +        List of sample items to show in the interface.
    +    """
    +    items = {}
    +    names = {"head": "First rows", "tail": "Last rows"}
    +    for key in sample:
    +        items[key] = {
    +            "name": names[key],
    +            "value": '<div id="sample-container" class="col-sm-12">{}</div>'.format(
    +                sample[key].to_html(classes="sample table table-striped")
    +            ),
    +        }
    +    return items
    +
    +
    +
    +def render_correlations_section(stats_object) +
    +
    +

    Render the correlations HTML.

    +

    Args

    +
    +
    stats_object
    +
    The diagrams to display in the correlation component.
    +
    +

    Returns

    +

    The rendered HTML of the correlations component of the profile.

    +
    +Source code +
    def render_correlations_section(stats_object: dict) -> str:
    +    """Render the correlations HTML.
    +
    +    Args:
    +        stats_object: The diagrams to display in the correlation component.
    +
    +    Returns:
    +        The rendered HTML of the correlations component of the profile.
    +    """
    +    items = get_correlation_items(stats_object)
    +
    +    return templates.template("components/tabs.html").render(values=items)
    -
    -def render_missing_html(stats_object) +
    +def render_missing_section(stats_object)

    Render the missing values HTML.

    @@ -678,7 +767,7 @@

    Returns

    The missing values component HTML.

    Source code -
    def render_missing_html(stats_object: dict) -> str:
    +
    def render_missing_section(stats_object: dict) -> str:
         """Render the missing values HTML.
     
         Args:
    @@ -687,11 +776,13 @@ 

    Returns

    Returns: The missing values component HTML. """ - return templates.template("missing.html").render(values=stats_object["missing"])
    + return templates.template("components/tabs.html").render( + values=stats_object["missing"] + )
    -
    -def render_overview_html(stats_object) +
    +def render_overview_section(stats_object)

    Render the overview HTML.

    @@ -704,7 +795,7 @@

    Returns

    The rendered HTML for the overview component of the profile.

    Source code -
    def render_overview_html(stats_object: dict) -> str:
    +
    def render_overview_section(stats_object: dict) -> str:
         """Render the overview HTML.
     
         Args:
    @@ -721,8 +812,8 @@ 

    Returns

    )
    -
    -def render_sample_html(sample) +
    +def render_sample_section(sample)

    Render the sample HTML

    @@ -735,7 +826,7 @@

    Returns

    The HTML rendering of the samples.

    Source code -
    def render_sample_html(sample: dict) -> str:
    +
    def render_sample_section(sample: dict) -> str:
         """Render the sample HTML
     
         Args:
    @@ -744,17 +835,13 @@ 

    Returns

    Returns: The HTML rendering of the samples. """ - formatted_samples = {} - for key in sample: - formatted_samples[key] = sample[key].to_html(classes="sample table-striped") - sample_html = templates.template("sample.html").render(values=formatted_samples) - # Previously, we only displayed the first samples. - # sample_html = templates.template('sample.html').render(sample_table_html=sample.to_html(classes="sample")) - return sample_html
    + items = get_sample_items(sample) + + return templates.template("components/list.html").render(values=items)
    -
    -def render_variables_html(stats_object) +
    +def render_variables_section(stats_object)

    Render the HTML for each of the variables in the DataFrame.

    @@ -767,7 +854,7 @@

    Returns

    The rendered HTML, where each row represents a variable.

    Source code -
    def render_variables_html(stats_object: dict) -> str:
    +
    def render_variables_section(stats_object: dict) -> str:
         """Render the HTML for each of the variables in the DataFrame.
     
         Args:
    @@ -804,7 +891,6 @@ 

    Returns

    formatted_values["row_classes"]["missing"] = "alert" if row["type"] in {Variable.TYPE_NUM, Variable.TYPE_DATE}: - formatted_values["histogram"] = histogram(row["histogramdata"], row) formatted_values["mini_histogram"] = mini_histogram( row["histogramdata"], row @@ -931,17 +1017,37 @@

    Returns

    "stats_object badly formatted. Did you generate this using the pandas_profiling.describe() function?" ) - render_htmls = { - "overview_html": render_overview_html(stats_object), - "rows_html": render_variables_html(stats_object), - "correlations_html": render_correlations_html(stats_object), - "missing_html": render_missing_html(stats_object), - "sample_html": render_sample_html(sample), - "full_width": config["style"]["full_width"].get(bool), - } - - # TODO: should be done in the template - return templates.template("base.html").render(render_htmls)
    + sections = [ + { + "title": "Overview", + "anchor_id": "overview", + "content": render_overview_section(stats_object), + }, + { + "title": "Variables", + "anchor_id": "variables", + "content": render_variables_section(stats_object), + }, + { + "title": "Correlations", + "anchor_id": "correlations", + "content": render_correlations_section(stats_object), + }, + { + "title": "Missing values", + "anchor_id": "missing", + "content": render_missing_section(stats_object), + }, + { + "title": "Sample", + "anchor_id": "sample", + "content": render_sample_section(sample), + }, + ] + + return templates.template("base.html").render( + sections=sections, full_width=config["style"]["full_width"].get(bool) + )
    @@ -964,11 +1070,13 @@

    Index

    diff --git a/examples/meteorites/meteorites_report.html b/examples/meteorites/meteorites_report.html index a3108f61d..d09564b82 100644 --- a/examples/meteorites/meteorites_report.html +++ b/examples/meteorites/meteorites_report.html @@ -260,7 +260,9 @@ width: 100%; overflow-y: hidden; } - +.img-responsive{ + max-width: 99%; +} .footer-text{ padding:20px; } @@ -280,11 +282,16 @@ /*top: -70px;*/ } -#sample-head, #sample-tail{ +#sample-container{ overflow: auto; width: 100%; overflow-y: hidden; -}

    Overview

    Dataset info

    Number of variables14
    Number of observations45726
    Missing cells29703 (< 0.1%)
    Duplicate rows0 (0.0%)
    Total size in memory4.6 MiB
    Average record size in memory105.0 B

    Variables types

    Numeric4
    Categorical5
    Boolean1
    Date1
    URL0
    Text (Unique)1
    Rejected2
    Unsupported0

    Warnings

    GeoLocation has a high cardinality: 17101 distinct values Warning
    GeoLocation has 7315 (16.0%) missing values Missing
    mass_(g) is highly skewed (γ1 = 76.918) Skewed
    recclass has a high cardinality: 466 distinct values Warning
    reclat has 6438 (14.1%) zeros Zeros
    reclat has 7315 (16.0%) missing values Missing
    reclat_city is highly correlated with reclat (ρ = 0.99426) Rejected
    reclong has 6214 (13.6%) zeros Zeros
    reclong has 7315 (16.0%) missing values Missing
    source has constant value "NASA" Rejected

    Variables

    boolean
    Boolean

    Distinct count2
    Unique (%)< 0.1%
    Missing (%)0.0%
    Missing (n)0
    True
    22900
    False
    22826
    ValueCountFrequency (%) 
    True 22900 50.1%
     
    False 22826 49.9%
     

    fall
    Categorical

    Distinct count2
    Unique (%)< 0.1%
    Missing (%)0.0%
    Missing (n)0
    Found
    44609
    Fell
     
    1117
    ValueCountFrequency (%) 
    Found 44609 > 99.9%
     
    Fell 1117 < 0.1%
     
    Max length5
    Mean length4.9756
    Min length4
    Contains charsTrue
    Contains digitsFalse
    Contains spacesFalse
    Contains non-wordsFalse

    GeoLocation
    Categorical

    Distinct count17101
    Unique (%)37.4%
    Missing (%)16.0%
    Missing (n)7315
    (0.0, 0.0)
    6214
    (-71.5, 35.66667)
     
    4761
    (-84.0, 168.0)
     
    3040
    Other values (17097)
    24396
    (Missing)
    7315
    ValueCountFrequency (%) 
    (0.0, 0.0) 6214 13.6%
     
    (-71.5, 35.66667) 4761 10.4%
     
    (-84.0, 168.0) 3040 6.6%
     
    (-72.0, 26.0) 1505 < 0.1%
     
    (-79.68333, 159.75) 657 < 0.1%
     
    (-76.71667, 159.66667) 637 < 0.1%
     
    (-76.18333, 157.16667) 539 < 0.1%
     
    (-79.68333, 155.75) 473 < 0.1%
     
    (-84.21667, 160.5) 263 < 0.1%
     
    (-86.36667, -70.0) 226 < 0.1%
     
    Other values (17090) 20096 43.9%
     
    (Missing) 7315 16.0%
     
    Max length24
    Mean length15.016
    Min length3
    Contains charsTrue
    Contains digitsTrue
    Contains spacesTrue
    Contains non-wordsTrue

    id
    Numeric

    Distinct count45716
    Unique (%)> 99.9%
    Missing (%)0.0%
    Missing (n)0
    Infinite (%)0.0%
    Infinite (n)0
    Mean26884
    Minimum1
    Maximum57458
    Zeros (%)0.0%
    Mini histogram

    Quantile statistics

    Minimum1
    5-th percentile2388.8
    Q112681
    Median24256
    Q340654
    95-th percentile54891
    Maximum57458
    Range57457
    Interquartile range27972

    Descriptive statistics

    Standard deviation16863
    Coef of variation0.62727
    Kurtosis-1.1601
    Mean26884
    MAD14490
    Skewness0.26653
    Sum1.2293e+09
    Variance2.8438e+08
    Memory size357.3 KiB
    Histogram
    ValueCountFrequency (%) 
    417 2 < 0.1%
     
    398 2 < 0.1%
     
    1 2 < 0.1%
     
    6 2 < 0.1%
     
    392 2 < 0.1%
     
    370 2 < 0.1%
     
    379 2 < 0.1%
     
    2 2 < 0.1%
     
    390 2 < 0.1%
     
    10 2 < 0.1%
     
    Other values (45706) 45706 > 99.9%
     

    Minimum 5 values

    ValueCountFrequency (%) 
    1 2 < 0.1%
     
    2 2 < 0.1%
     
    4 1 < 0.1%
     
    5 1 < 0.1%
     
    6 2 < 0.1%
     

    Maximum 5 values

    ValueCountFrequency (%) 
    57458 1 < 0.1%
     
    57457 1 < 0.1%
     
    57456 1 < 0.1%
     
    57455 1 < 0.1%
     
    57454 1 < 0.1%
     

    mass_(g)
    Numeric

    Distinct count12577
    Unique (%)27.5%
    Missing (%)< 0.1%
    Missing (n)131
    Infinite (%)0.0%
    Infinite (n)0
    Mean13278
    Minimum0
    Maximum6e+07
    Zeros (%)< 0.1%
    Mini histogram

    Quantile statistics

    Minimum0
    5-th percentile1.1
    Q17.2
    Median32.61
    Q3202.9
    95-th percentile4000
    Maximum6e+07
    Range6e+07
    Interquartile range195.7

    Descriptive statistics

    Standard deviation5.7493e+05
    Coef of variation43.298
    Kurtosis6798.4
    Mean13278
    MAD25113
    Skewness76.918
    Sum6.0543e+08
    Variance3.3054e+11
    Memory size357.3 KiB
    Histogram
    ValueCountFrequency (%) 
    1.3 171 < 0.1%
     
    1.2 140 < 0.1%
     
    1.4 138 < 0.1%
     
    2.1 130 < 0.1%
     
    2.4 126 < 0.1%
     
    1.6 120 < 0.1%
     
    0.5 119 < 0.1%
     
    1.1 116 < 0.1%
     
    3.8 114 < 0.1%
     
    0.7 111 < 0.1%
     
    Other values (12566) 44310 > 99.9%
     
    (Missing) 131 < 0.1%
     

    Minimum 5 values

    ValueCountFrequency (%) 
    0 19 < 0.1%
     
    0.01 2 < 0.1%
     
    0.013 1 < 0.1%
     
    0.02 1 < 0.1%
     
    0.03 1 < 0.1%
     

    Maximum 5 values

    ValueCountFrequency (%) 
    6e+07 1 < 0.1%
     
    5.82e+07 1 < 0.1%
     
    5e+07 1 < 0.1%
     
    3e+07 1 < 0.1%
     
    2.8e+07 1 < 0.1%
     

    mixed
    Categorical

    Distinct count2
    Unique (%)< 0.1%
    Missing (%)0.0%
    Missing (n)0
    1
    22896
    A
    22830
    ValueCountFrequency (%) 
    1 22896 50.1%
     
    A 22830 49.9%
     
    Max length1
    Mean length1
    Min length1
    Contains charsTrue
    Contains digitsTrue
    Contains spacesFalse
    Contains non-wordsFalse

    name
    Categorical, Unique

    First 5 values
    Aachen
    Aachen copy
    Aarhus
    Aarhus copy
    Abajo
    Last 5 values
    Österplana 062
    Österplana 063
    Österplana 064
    Łowicz
    Święcany

    First 5 values

    ValueCountFrequency (%) 
    Aachen 1 < 0.1%
     
    Aachen copy 1 < 0.1%
     
    Aarhus 1 < 0.1%
     
    Aarhus copy 1 < 0.1%
     
    Abajo 1 < 0.1%
     

    Last 5 values

    ValueCountFrequency (%) 
    Święcany 1 < 0.1%
     
    Łowicz 1 < 0.1%
     
    Österplana 064 1 < 0.1%
     
    Österplana 063 1 < 0.1%
     
    Österplana 062 1 < 0.1%
     

    nametype
    Categorical

    Distinct count2
    Unique (%)< 0.1%
    Missing (%)0.0%
    Missing (n)0
    Valid
    45651
    Relict
     
    75
    ValueCountFrequency (%) 
    Valid 45651 > 99.9%
     
    Relict 75 < 0.1%
     
    Max length6
    Mean length5.0016
    Min length5
    Contains charsTrue
    Contains digitsFalse
    Contains spacesFalse
    Contains non-wordsFalse

    recclass
    Categorical

    Distinct count466
    Unique (%)< 0.1%
    Missing (%)0.0%
    Missing (n)0
    L6
    8287
    H5
    7143
    L5
     
    4797
    Other values (463)
    25499
    ValueCountFrequency (%) 
    L6 8287 18.1%
     
    H5 7143 15.6%
     
    L5 4797 10.5%
     
    H6 4529 9.9%
     
    H4 4211 9.2%
     
    LL5 2766 6.0%
     
    LL6 2043 < 0.1%
     
    L4 1253 < 0.1%
     
    H4/5 428 < 0.1%
     
    CM2 416 < 0.1%
     
    Other values (456) 9853 21.5%
     
    Max length26
    Mean length3.0525
    Min length1
    Contains charsTrue
    Contains digitsTrue
    Contains spacesTrue
    Contains non-wordsTrue

    reclat
    Numeric

    Distinct count12739
    Unique (%)27.9%
    Missing (%)16.0%
    Missing (n)7315
    Infinite (%)0.0%
    Infinite (n)0
    Mean-39.107
    Minimum-87.367
    Maximum81.167
    Zeros (%)14.1%
    Mini histogram

    Quantile statistics

    Minimum-87.367
    5-th percentile-84.355
    Q1-76.714
    Median-71.5
    Q30
    95-th percentile34.494
    Maximum81.167
    Range168.53
    Interquartile range76.714

    Descriptive statistics

    Standard deviation46.386
    Coef of variation-1.1861
    Kurtosis-1.4769
    Mean-39.107
    MAD43.937
    Skewness0.49132
    Sum-1.5021e+06
    Variance2151.7
    Memory size357.3 KiB
    Histogram
    ValueCountFrequency (%) 
    0 6438 14.1%
     
    -71.5 4761 10.4%
     
    -84 3040 6.6%
     
    -72 1506 < 0.1%
     
    -79.683 1130 < 0.1%
     
    -76.717 680 < 0.1%
     
    -76.183 539 < 0.1%
     
    -84.217 263 < 0.1%
     
    -86.367 226 < 0.1%
     
    -86.717 217 < 0.1%
     
    Other values (12728) 19611 42.9%
     
    (Missing) 7315 16.0%
     

    Minimum 5 values

    ValueCountFrequency (%) 
    -87.367 4 < 0.1%
     
    -87.033 3 < 0.1%
     
    -86.933 3 < 0.1%
     
    -86.717 217 < 0.1%
     
    -86.567 17 < 0.1%
     

    Maximum 5 values

    ValueCountFrequency (%) 
    81.167 1 < 0.1%
     
    76.533 1 < 0.1%
     
    76.133 1 < 0.1%
     
    72.883 1 < 0.1%
     
    72.683 1 < 0.1%
     

    reclat_city
    Highly correlated

    This variable is highly correlated with reclat and should be ignored for analysis

    Correlation0.99426

    reclong
    Numeric

    Distinct count14641
    Unique (%)32.0%
    Missing (%)16.0%
    Missing (n)7315
    Infinite (%)0.0%
    Infinite (n)0
    Mean61.053
    Minimum-165.43
    Maximum354.47
    Zeros (%)13.6%
    Mini histogram

    Quantile statistics

    Minimum-165.43
    5-th percentile-90.427
    Q10
    Median35.667
    Q3157.17
    95-th percentile168
    Maximum354.47
    Range519.91
    Interquartile range157.17

    Descriptive statistics

    Standard deviation80.655
    Coef of variation1.3211
    Kurtosis-0.73139
    Mean61.053
    MAD67.606
    Skewness-0.17438
    Sum2.3451e+06
    Variance6505.3
    Memory size357.3 KiB
    Histogram
    ValueCountFrequency (%) 
    0 6214 13.6%
     
    35.667 4985 10.9%
     
    168 3040 6.6%
     
    26 1506 < 0.1%
     
    159.75 657 < 0.1%
     
    159.67 637 < 0.1%
     
    157.17 542 < 0.1%
     
    155.75 473 < 0.1%
     
    160.5 263 < 0.1%
     
    -70 228 < 0.1%
     
    Other values (14630) 19866 43.4%
     
    (Missing) 7315 16.0%
     

    Minimum 5 values

    ValueCountFrequency (%) 
    -165.43 9 < 0.1%
     
    -165.12 17 < 0.1%
     
    -163.17 1 < 0.1%
     
    -162.55 1 < 0.1%
     
    -157.87 1 < 0.1%
     

    Maximum 5 values

    ValueCountFrequency (%) 
    354.47 1 < 0.1%
     
    178.2 1 < 0.1%
     
    178.08 1 < 0.1%
     
    175.73 1 < 0.1%
     
    175.13 1 < 0.1%
     

    source
    Constant

    This variable is constant and should be ignored for analysis

    Constant valueNASA

    year
    Date

    Distinct count246
    Unique (%)< 0.1%
    Missing (%)< 0.1%
    Missing (n)312
    Infinite (%)0.0%
    Infinite (n)0
    Minimum1688-01-01 00:00:00
    Maximum2101-01-01 00:00:00
    Mini histogram
    Histogram

    Correlations

    Missing values

    Matrix

    Matrix

    Count

    Bar

    Heatmap

    Heatmap

    Dendrogram

    Dendrogram

    Sample

    First rows

    booleanfallGeoLocationidmass_(g)mixednamenametyperecclassreclatreclat_cityreclongsourceyear
    0TrueFell(50.775, 6.08333)121.0AAachenValidL550.7750053.3938626.08333NASA1880-01-01
    1TrueFell(56.18333, 10.23333)2720.0AAarhusValidH656.1833360.46106510.23333NASA1951-01-01
    2TrueFell(54.21667, -113.0)6107000.0AAbeeValidEH454.2166761.415499-113.00000NASA1952-01-01
    3TrueFell(16.88333, -99.9)101914.01AcapulcoValidAcapulcoite16.8833320.875391-99.90000NASA1976-01-01
    4FalseFell(-33.16667, -64.95)370780.0AAchirasValidL6-33.16667-35.780079-64.95000NASA1902-01-01
    5FalseFell(32.1, 71.8)3794239.0AAdhi KotValidEH432.1000031.79167871.80000NASA1919-01-01
    6TrueFell(44.83333, 95.16667)390910.01Adzhi-Bogdo (stone)ValidLL3-644.8333345.44039395.16667NASA1949-01-01
    7TrueFell(44.21667, 0.61667)39230000.01AgenValidH544.2166746.6675330.61667NASA1814-01-01
    8TrueFell(-31.6, -65.23333)3981620.0AAguadaValidL6-31.60000-24.103793-65.23333NASA1930-01-01
    9TrueFell(-30.86667, -64.55)4171440.01Aguila BlancaValidL-30.86667-25.287844-64.55000NASA1920-01-01

    Last rows

    booleanfallGeoLocationidmass_(g)mixednamenametyperecclassreclatreclat_cityreclongsourceyear
    45716TrueFell(50.775, 6.08333)121.0AAachen copyValidL550.7750053.3938626.08333NASA1880-01-01
    45717TrueFell(56.18333, 10.23333)2720.0AAarhus copyValidH656.1833360.46106510.23333NASA1951-01-01
    45718TrueFell(54.21667, -113.0)6107000.0AAbee copyValidEH454.2166761.415499-113.00000NASA1952-01-01
    45719TrueFell(16.88333, -99.9)101914.01Acapulco copyValidAcapulcoite16.8833320.875391-99.90000NASA1976-01-01
    45720FalseFell(-33.16667, -64.95)370780.0AAchiras copyValidL6-33.16667-35.780079-64.95000NASA1902-01-01
    45721FalseFell(32.1, 71.8)3794239.0AAdhi Kot copyValidEH432.1000031.79167871.80000NASA1919-01-01
    45722TrueFell(44.83333, 95.16667)390910.01Adzhi-Bogdo (stone) copyValidLL3-644.8333345.44039395.16667NASA1949-01-01
    45723TrueFell(44.21667, 0.61667)39230000.01Agen copyValidH544.2166746.6675330.61667NASA1814-01-01
    45724TrueFell(-31.6, -65.23333)3981620.0AAguada copyValidL6-31.60000-24.103793-65.23333NASA1930-01-01
    45725TrueFell(-30.86667, -64.55)4171440.01Aguila Blanca copyValidL-30.86667-25.287844-64.55000NASA1920-01-01