From b41e0d783e0b545941069bbae5416aad3b6837c9 Mon Sep 17 00:00:00 2001
From: simon_graphkite
Date: Fri, 12 Jul 2019 01:37:03 +0200
Subject: [PATCH] Commit for release 2.1.2.
- Fix [#211] and README.
---
MANIFEST.in | 7 ++-----
README.md | 2 +-
docs/index.html | 2 +-
examples/meteorites/meteorites_report.html | 2 +-
examples/nza/nza_report.html | 2 +-
examples/stata_auto/stata_auto_report.html | 2 +-
examples/titanic/titanic_report.html | 2 +-
.../website_inaccessibility_report.html | 2 +-
setup.py | 2 +-
9 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/MANIFEST.in b/MANIFEST.in
index fac060ccc..ecdbb5edb 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,9 +1,6 @@
include LICENSE
include pandas_profiling/view/*.mplstyle
-include pandas_profiling/view/templates/*.html
-include pandas_profiling/view/templates/variables/*.html
-include pandas_profiling/view/templates/assets/*.js
-include pandas_profiling/view/templates/assets/*.css
-include pandas_profiling/view/templates/*.css
+recursive-include pandas_profiling/view/templates *.html
+recursive-include pandas_profiling/view/templates/assets *.js *.css
include pandas_profiling/config_default.yaml
include README.md
diff --git a/README.md b/README.md
index 17db53f2b..d68aae603 100644
--- a/README.md
+++ b/README.md
@@ -142,7 +142,7 @@ Read more on getting involved in the [Contribution Guide](https://github.com/pan
## Dependencies
-You need Python 3 to run this package. Other dependencies can be found in the requirements files:
+You need [Python 3](https://python3statement.org/) to run this package. Other dependencies can be found in the requirements files:
| Filename | Requirements|
|----------|-------------|
diff --git a/docs/index.html b/docs/index.html
index 2e1ac75f5..7cc746038 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -128,7 +128,7 @@ How to contribute
If you would like to be a industry partner or sponsor, please drop us a line.
Read more on getting involved in the Contribution Guide.
Dependencies
-You need Python 3 to run this package. Other dependencies can be found in the requirements files:
+You need Python 3 to run this package. Other dependencies can be found in the requirements files:
diff --git a/examples/meteorites/meteorites_report.html b/examples/meteorites/meteorites_report.html
index d09564b82..b78359c4c 100644
--- a/examples/meteorites/meteorites_report.html
+++ b/examples/meteorites/meteorites_report.html
@@ -291,7 +291,7 @@
#overview-content td, #overview-content th{
border-top: 0;
line-height: 1;
-}Dataset info
Number of variables | 14 |
---|
Number of observations | 45726 |
---|
Missing cells | 29703 (< 0.1%) |
---|
Duplicate rows | 0 (0.0%) |
---|
Total size in memory | 4.6 MiB |
---|
Average record size in memory | 105.0 B |
---|
Variables types
Numeric | 4 |
---|
Categorical | 5 |
---|
Boolean | 1 |
---|
Date | 1 |
---|
URL | 0 |
---|
Text (Unique) | 1 |
---|
Rejected | 2 |
---|
Unsupported | 0 |
---|
Warnings
GeoLocation has a high cardinality: 17101 distinct values | Warning |
GeoLocation has 7315 (16.0%) missing values | Missing |
mass_(g) is highly skewed (γ1 = 76.918) | Skewed |
recclass has a high cardinality: 466 distinct values | Warning |
reclat has 6438 (14.1%) zeros | Zeros |
reclat has 7315 (16.0%) missing values | Missing |
reclat_city is highly correlated with reclat (ρ = 0.99422) | Rejected |
reclong has 6214 (13.6%) zeros | Zeros |
reclong has 7315 (16.0%) missing values | Missing |
source has constant value "NASA" | Rejected |
Distinct count | 2 |
---|
Unique (%) | < 0.1% |
---|
Missing (%) | 0.0% |
---|
Missing (n) | 0 |
---|
Distinct count | 2 |
---|
Unique (%) | < 0.1% |
---|
Missing (%) | 0.0% |
---|
Missing (n) | 0 |
---|
Value | Count | Frequency (%) | |
Found | 44609 | > 99.9% | |
Fell | 1117 | < 0.1% | |
Distinct count | 17101 |
---|
Unique (%) | 37.4% |
---|
Missing (%) | 16.0% |
---|
Missing (n) | 7315 |
---|
(0.0, 0.0) | 6214 |
---|
(-71.5, 35.66667) | 4761 |
---|
(-84.0, 168.0) | 3040 |
---|
Other values (17097) | 24396 |
---|
(Missing) | 7315 |
---|
Value | Count | Frequency (%) | |
(0.0, 0.0) | 6214 | 13.6% | |
(-71.5, 35.66667) | 4761 | 10.4% | |
(-84.0, 168.0) | 3040 | 6.6% | |
(-72.0, 26.0) | 1505 | < 0.1% | |
(-79.68333, 159.75) | 657 | < 0.1% | |
(-76.71667, 159.66667) | 637 | < 0.1% | |
(-76.18333, 157.16667) | 539 | < 0.1% | |
(-79.68333, 155.75) | 473 | < 0.1% | |
(-84.21667, 160.5) | 263 | < 0.1% | |
(-86.36667, -70.0) | 226 | < 0.1% | |
Other values (17090) | 20096 | 43.9% | |
(Missing) | 7315 | 16.0% | |
Distinct count | 45716 |
---|
Unique (%) | > 99.9% |
---|
Missing (%) | 0.0% |
---|
Missing (n) | 0 |
---|
Infinite (%) | 0.0% |
---|
Infinite (n) | 0 |
---|
Mean | 26884 |
---|
Minimum | 1 |
---|
Maximum | 57458 |
---|
Zeros (%) | 0.0% |
---|
Quantile statistics
Minimum | 1 |
---|
5-th percentile | 2388.8 |
---|
Q1 | 12681 |
---|
Median | 24256 |
---|
Q3 | 40654 |
---|
95-th percentile | 54891 |
---|
Maximum | 57458 |
---|
Range | 57457 |
---|
Interquartile range | 27972 |
---|
Descriptive statistics
Standard deviation | 16863 |
---|
Coef of variation | 0.62727 |
---|
Kurtosis | -1.1601 |
---|
Mean | 26884 |
---|
MAD | 14490 |
---|
Skewness | 0.26653 |
---|
Sum | 1.2293e+09 |
---|
Variance | 2.8438e+08 |
---|
Memory size | 357.3 KiB |
---|
Value | Count | Frequency (%) | |
417 | 2 | < 0.1% | |
398 | 2 | < 0.1% | |
1 | 2 | < 0.1% | |
6 | 2 | < 0.1% | |
392 | 2 | < 0.1% | |
370 | 2 | < 0.1% | |
379 | 2 | < 0.1% | |
2 | 2 | < 0.1% | |
390 | 2 | < 0.1% | |
10 | 2 | < 0.1% | |
Other values (45706) | 45706 | > 99.9% | |
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 2 | < 0.1% | |
2 | 2 | < 0.1% | |
4 | 1 | < 0.1% | |
5 | 1 | < 0.1% | |
6 | 2 | < 0.1% | |
Maximum 5 values
Value | Count | Frequency (%) | |
57458 | 1 | < 0.1% | |
57457 | 1 | < 0.1% | |
57456 | 1 | < 0.1% | |
57455 | 1 | < 0.1% | |
57454 | 1 | < 0.1% | |
Distinct count | 12577 |
---|
Unique (%) | 27.5% |
---|
Missing (%) | < 0.1% |
---|
Missing (n) | 131 |
---|
Infinite (%) | 0.0% |
---|
Infinite (n) | 0 |
---|
Mean | 13278 |
---|
Minimum | 0 |
---|
Maximum | 6e+07 |
---|
Zeros (%) | < 0.1% |
---|
Quantile statistics
Minimum | 0 |
---|
5-th percentile | 1.1 |
---|
Q1 | 7.2 |
---|
Median | 32.61 |
---|
Q3 | 202.9 |
---|
95-th percentile | 4000 |
---|
Maximum | 6e+07 |
---|
Range | 6e+07 |
---|
Interquartile range | 195.7 |
---|
Descriptive statistics
Standard deviation | 5.7493e+05 |
---|
Coef of variation | 43.298 |
---|
Kurtosis | 6798.4 |
---|
Mean | 13278 |
---|
MAD | 25113 |
---|
Skewness | 76.918 |
---|
Sum | 6.0543e+08 |
---|
Variance | 3.3054e+11 |
---|
Memory size | 357.3 KiB |
---|
Value | Count | Frequency (%) | |
1.3 | 171 | < 0.1% | |
1.2 | 140 | < 0.1% | |
1.4 | 138 | < 0.1% | |
2.1 | 130 | < 0.1% | |
2.4 | 126 | < 0.1% | |
1.6 | 120 | < 0.1% | |
0.5 | 119 | < 0.1% | |
1.1 | 116 | < 0.1% | |
3.8 | 114 | < 0.1% | |
0.7 | 111 | < 0.1% | |
Other values (12566) | 44310 | > 99.9% | |
(Missing) | 131 | < 0.1% | |
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 19 | < 0.1% | |
0.01 | 2 | < 0.1% | |
0.013 | 1 | < 0.1% | |
0.02 | 1 | < 0.1% | |
0.03 | 1 | < 0.1% | |
Maximum 5 values
Value | Count | Frequency (%) | |
6e+07 | 1 | < 0.1% | |
5.82e+07 | 1 | < 0.1% | |
5e+07 | 1 | < 0.1% | |
3e+07 | 1 | < 0.1% | |
2.8e+07 | 1 | < 0.1% | |
Distinct count | 2 |
---|
Unique (%) | < 0.1% |
---|
Missing (%) | 0.0% |
---|
Missing (n) | 0 |
---|
Value | Count | Frequency (%) | |
1 | 23060 | 50.4% | |
A | 22666 | 49.6% | |
First 5 values |
---|
Aachen |
Aachen copy |
Aarhus |
Aarhus copy |
Abajo |
Last 5 values |
---|
Österplana 062 |
Österplana 063 |
Österplana 064 |
Łowicz |
Święcany |
First 5 values
Value | Count | Frequency (%) | |
Aachen | 1 | < 0.1% | |
Aachen copy | 1 | < 0.1% | |
Aarhus | 1 | < 0.1% | |
Aarhus copy | 1 | < 0.1% | |
Abajo | 1 | < 0.1% | |
Last 5 values
Value | Count | Frequency (%) | |
Święcany | 1 | < 0.1% | |
Łowicz | 1 | < 0.1% | |
Österplana 064 | 1 | < 0.1% | |
Österplana 063 | 1 | < 0.1% | |
Österplana 062 | 1 | < 0.1% | |
Distinct count | 2 |
---|
Unique (%) | < 0.1% |
---|
Missing (%) | 0.0% |
---|
Missing (n) | 0 |
---|
Value | Count | Frequency (%) | |
Valid | 45651 | > 99.9% | |
Relict | 75 | < 0.1% | |
Distinct count | 466 |
---|
Unique (%) | < 0.1% |
---|
Missing (%) | 0.0% |
---|
Missing (n) | 0 |
---|
L6 | 8287 |
---|
H5 | 7143 |
---|
L5 | 4797 |
---|
Other values (463) | 25499 |
---|
Value | Count | Frequency (%) | |
L6 | 8287 | 18.1% | |
H5 | 7143 | 15.6% | |
L5 | 4797 | 10.5% | |
H6 | 4529 | 9.9% | |
H4 | 4211 | 9.2% | |
LL5 | 2766 | 6.0% | |
LL6 | 2043 | < 0.1% | |
L4 | 1253 | < 0.1% | |
H4/5 | 428 | < 0.1% | |
CM2 | 416 | < 0.1% | |
Other values (456) | 9853 | 21.5% | |
Distinct count | 12739 |
---|
Unique (%) | 27.9% |
---|
Missing (%) | 16.0% |
---|
Missing (n) | 7315 |
---|
Infinite (%) | 0.0% |
---|
Infinite (n) | 0 |
---|
Mean | -39.107 |
---|
Minimum | -87.367 |
---|
Maximum | 81.167 |
---|
Zeros (%) | 14.1% |
---|
Quantile statistics
Minimum | -87.367 |
---|
5-th percentile | -84.355 |
---|
Q1 | -76.714 |
---|
Median | -71.5 |
---|
Q3 | 0 |
---|
95-th percentile | 34.494 |
---|
Maximum | 81.167 |
---|
Range | 168.53 |
---|
Interquartile range | 76.714 |
---|
Descriptive statistics
Standard deviation | 46.386 |
---|
Coef of variation | -1.1861 |
---|
Kurtosis | -1.4769 |
---|
Mean | -39.107 |
---|
MAD | 43.937 |
---|
Skewness | 0.49132 |
---|
Sum | -1.5021e+06 |
---|
Variance | 2151.7 |
---|
Memory size | 357.3 KiB |
---|
Value | Count | Frequency (%) | |
0 | 6438 | 14.1% | |
-71.5 | 4761 | 10.4% | |
-84 | 3040 | 6.6% | |
-72 | 1506 | < 0.1% | |
-79.683 | 1130 | < 0.1% | |
-76.717 | 680 | < 0.1% | |
-76.183 | 539 | < 0.1% | |
-84.217 | 263 | < 0.1% | |
-86.367 | 226 | < 0.1% | |
-86.717 | 217 | < 0.1% | |
Other values (12728) | 19611 | 42.9% | |
(Missing) | 7315 | 16.0% | |
Minimum 5 values
Value | Count | Frequency (%) | |
-87.367 | 4 | < 0.1% | |
-87.033 | 3 | < 0.1% | |
-86.933 | 3 | < 0.1% | |
-86.717 | 217 | < 0.1% | |
-86.567 | 17 | < 0.1% | |
Maximum 5 values
Value | Count | Frequency (%) | |
81.167 | 1 | < 0.1% | |
76.533 | 1 | < 0.1% | |
76.133 | 1 | < 0.1% | |
72.883 | 1 | < 0.1% | |
72.683 | 1 | < 0.1% | |
reclat_city
Highly correlated
This variable is highly correlated with reclat
and should be ignored for analysis
Distinct count | 14641 |
---|
Unique (%) | 32.0% |
---|
Missing (%) | 16.0% |
---|
Missing (n) | 7315 |
---|
Infinite (%) | 0.0% |
---|
Infinite (n) | 0 |
---|
Mean | 61.053 |
---|
Minimum | -165.43 |
---|
Maximum | 354.47 |
---|
Zeros (%) | 13.6% |
---|
Quantile statistics
Minimum | -165.43 |
---|
5-th percentile | -90.427 |
---|
Q1 | 0 |
---|
Median | 35.667 |
---|
Q3 | 157.17 |
---|
95-th percentile | 168 |
---|
Maximum | 354.47 |
---|
Range | 519.91 |
---|
Interquartile range | 157.17 |
---|
Descriptive statistics
Standard deviation | 80.655 |
---|
Coef of variation | 1.3211 |
---|
Kurtosis | -0.73139 |
---|
Mean | 61.053 |
---|
MAD | 67.606 |
---|
Skewness | -0.17438 |
---|
Sum | 2.3451e+06 |
---|
Variance | 6505.3 |
---|
Memory size | 357.3 KiB |
---|
Value | Count | Frequency (%) | |
0 | 6214 | 13.6% | |
35.667 | 4985 | 10.9% | |
168 | 3040 | 6.6% | |
26 | 1506 | < 0.1% | |
159.75 | 657 | < 0.1% | |
159.67 | 637 | < 0.1% | |
157.17 | 542 | < 0.1% | |
155.75 | 473 | < 0.1% | |
160.5 | 263 | < 0.1% | |
-70 | 228 | < 0.1% | |
Other values (14630) | 19866 | 43.4% | |
(Missing) | 7315 | 16.0% | |
Minimum 5 values
Value | Count | Frequency (%) | |
-165.43 | 9 | < 0.1% | |
-165.12 | 17 | < 0.1% | |
-163.17 | 1 | < 0.1% | |
-162.55 | 1 | < 0.1% | |
-157.87 | 1 | < 0.1% | |
Maximum 5 values
Value | Count | Frequency (%) | |
354.47 | 1 | < 0.1% | |
178.2 | 1 | < 0.1% | |
178.08 | 1 | < 0.1% | |
175.73 | 1 | < 0.1% | |
175.13 | 1 | < 0.1% | |
This variable is constant and should be ignored for analysis
Distinct count | 246 |
---|
Unique (%) | < 0.1% |
---|
Missing (%) | < 0.1% |
---|
Missing (n) | 312 |
---|
Infinite (%) | 0.0% |
---|
Infinite (n) | 0 |
---|
Minimum | 1688-01-01 00:00:00 |
---|
Maximum | 2101-01-01 00:00:00 |
---|
First rows
| boolean | fall | GeoLocation | id | mass_(g) | mixed | name | nametype | recclass | reclat | reclat_city | reclong | source | year |
---|
0 | True | Fell | (50.775, 6.08333) | 1 | 21.0 | A | Aachen | Valid | L5 | 50.77500 | 45.844917 | 6.08333 | NASA | 1880-01-01 |
---|
1 | False | Fell | (56.18333, 10.23333) | 2 | 720.0 | 1 | Aarhus | Valid | H6 | 56.18333 | 61.401378 | 10.23333 | NASA | 1951-01-01 |
---|
2 | True | Fell | (54.21667, -113.0) | 6 | 107000.0 | 1 | Abee | Valid | EH4 | 54.21667 | 56.665445 | -113.00000 | NASA | 1952-01-01 |
---|
3 | True | Fell | (16.88333, -99.9) | 10 | 1914.0 | A | Acapulco | Valid | Acapulcoite | 16.88333 | 13.980564 | -99.90000 | NASA | 1976-01-01 |
---|
4 | False | Fell | (-33.16667, -64.95) | 370 | 780.0 | 1 | Achiras | Valid | L6 | -33.16667 | -31.246833 | -64.95000 | NASA | 1902-01-01 |
---|
5 | False | Fell | (32.1, 71.8) | 379 | 4239.0 | 1 | Adhi Kot | Valid | EH4 | 32.10000 | 30.168071 | 71.80000 | NASA | 1919-01-01 |
---|
6 | True | Fell | (44.83333, 95.16667) | 390 | 910.0 | 1 | Adzhi-Bogdo (stone) | Valid | LL3-6 | 44.83333 | 41.823701 | 95.16667 | NASA | 1949-01-01 |
---|
7 | False | Fell | (44.21667, 0.61667) | 392 | 30000.0 | A | Agen | Valid | H5 | 44.21667 | 45.691889 | 0.61667 | NASA | 1814-01-01 |
---|
8 | False | Fell | (-31.6, -65.23333) | 398 | 1620.0 | A | Aguada | Valid | L6 | -31.60000 | -27.353326 | -65.23333 | NASA | 1930-01-01 |
---|
9 | True | Fell | (-30.86667, -64.55) | 417 | 1440.0 | 1 | Aguila Blanca | Valid | L | -30.86667 | -27.320248 | -64.55000 | NASA | 1920-01-01 |
---|
Last rows
| boolean | fall | GeoLocation | id | mass_(g) | mixed | name | nametype | recclass | reclat | reclat_city | reclong | source | year |
---|
45716 | True | Fell | (50.775, 6.08333) | 1 | 21.0 | A | Aachen copy | Valid | L5 | 50.77500 | 45.844917 | 6.08333 | NASA | 1880-01-01 |
---|
45717 | False | Fell | (56.18333, 10.23333) | 2 | 720.0 | 1 | Aarhus copy | Valid | H6 | 56.18333 | 61.401378 | 10.23333 | NASA | 1951-01-01 |
---|
45718 | True | Fell | (54.21667, -113.0) | 6 | 107000.0 | 1 | Abee copy | Valid | EH4 | 54.21667 | 56.665445 | -113.00000 | NASA | 1952-01-01 |
---|
45719 | True | Fell | (16.88333, -99.9) | 10 | 1914.0 | A | Acapulco copy | Valid | Acapulcoite | 16.88333 | 13.980564 | -99.90000 | NASA | 1976-01-01 |
---|
45720 | False | Fell | (-33.16667, -64.95) | 370 | 780.0 | 1 | Achiras copy | Valid | L6 | -33.16667 | -31.246833 | -64.95000 | NASA | 1902-01-01 |
---|
45721 | False | Fell | (32.1, 71.8) | 379 | 4239.0 | 1 | Adhi Kot copy | Valid | EH4 | 32.10000 | 30.168071 | 71.80000 | NASA | 1919-01-01 |
---|
45722 | True | Fell | (44.83333, 95.16667) | 390 | 910.0 | 1 | Adzhi-Bogdo (stone) copy | Valid | LL3-6 | 44.83333 | 41.823701 | 95.16667 | NASA | 1949-01-01 |
---|
45723 | False | Fell | (44.21667, 0.61667) | 392 | 30000.0 | A | Agen copy | Valid | H5 | 44.21667 | 45.691889 | 0.61667 | NASA | 1814-01-01 |
---|
45724 | False | Fell | (-31.6, -65.23333) | 398 | 1620.0 | A | Aguada copy | Valid | L6 | -31.60000 | -27.353326 | -65.23333 | NASA | 1930-01-01 |
---|
45725 | True | Fell | (-30.86667, -64.55) | 417 | 1440.0 | 1 | Aguila Blanca copy | Valid | L | -30.86667 | -27.320248 | -64.55000 | NASA | 1920-01-01 |
---|