Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF and TEST: rank_size in inequality.py #551

Merged
merged 1 commit into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion quantecon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .graph_tools import DiGraph, random_tournament_graph
from .gridtools import cartesian, mlinspace, simplex_grid, simplex_index
from .inequality import lorenz_curve, gini_coefficient, shorrocks_index, \
rank_size_plot
rank_size
from .kalman import Kalman
from .lae import LAE
from .arma import ARMA
Expand Down
47 changes: 22 additions & 25 deletions quantecon/inequality.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,17 @@
@njit
def lorenz_curve(y):
"""
Calculates the Lorenz Curve, a graphical representation of the distribution of income
or wealth.
Calculates the Lorenz Curve, a graphical representation of
the distribution of income or wealth.

It returns the cumulative share of people (x-axis) and the cumulative share of income earned
It returns the cumulative share of people (x-axis) and
the cumulative share of income earned.

Parameters
----------
y : array_like(float or int, ndim=1)
Array of income/wealth for each individual. Unordered or ordered is fine.
Array of income/wealth for each individual.
Unordered or ordered is fine.

Returns
-------
Expand Down Expand Up @@ -60,7 +62,8 @@ def gini_coefficient(y):
Parameters
-----------
y : array_like(float)
Array of income/wealth for each individual. Ordered or unordered is fine
Array of income/wealth for each individual.
Ordered or unordered is fine

Returns
-------
Expand Down Expand Up @@ -96,15 +99,15 @@ def shorrocks_index(A):
The Shorrocks mobility index calculated as

.. math::

s(A) = \frac{m - \sum_j a_{jj} }{m - 1} \in (0, 1)

An index equal to 0 indicates complete immobility.

References
-----------
.. [1] Wealth distribution and social mobility in the US: A quantitative approach
(Benhabib, Bisin, Luo, 2017).
.. [1] Wealth distribution and social mobility in the US:
A quantitative approach (Benhabib, Bisin, Luo, 2017).
https://www.econ.nyu.edu/user/bisina/RevisionAugust.pdf
"""

Expand All @@ -119,38 +122,32 @@ def shorrocks_index(A):
return (m - diag_sum) / (m - 1)


def rank_size_plot(data, ax, label=None, c=1.0):
def rank_size(data, c=1.0):
"""
Generate rank-size data corresponding to distribution data.

Examples
--------

> import numpy as np
> import matplotlib.pyplot as plt
> y = np.exp(np.random.randn(1000)) # simulate data
> fig, ax = plt.subplots()
> rank_size_plot(y, ax)
> plt.show()
>>> y = np.exp(np.random.randn(1000)) # simulate data
>>> rank_data, size_data = rank_size(y, c=0.85)

Parameters
----------

data : array_like
the set of observations
c : int or float
restrict plot to top (c x 100)% of the distribution
ax : axis object
for plotting on, has method ax.loglog

Returns
-------
rank_data : array_like(float, ndim=1)
Location in the population when sorted from smallest to largest
size_data : array_like(float, ndim=1)
Size data for top (c x 100)% of the observations
"""
w = - np.sort(- data) # Reverse sort
w = w[:int(len(w) * c)] # extract top (c * 100)%
rank_data = np.arange(len(w)) + 1
size_data = w
ax.loglog(rank_data, size_data, 'o', markersize=3.0, alpha=0.5, label=label)
if label:
ax.legend()
ax.set_xlabel("log rank")
ax.set_ylabel("log size")

return rank_data, size_data

50 changes: 47 additions & 3 deletions quantecon/tests/test_inequality.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
"""

import numpy as np
from numpy.testing import assert_allclose
from quantecon import lorenz_curve, gini_coefficient, shorrocks_index
from numpy.testing import assert_allclose, assert_raises
from scipy.stats import linregress
from quantecon import lorenz_curve, gini_coefficient, \
shorrocks_index, rank_size


def test_lorenz_curve():
Expand Down Expand Up @@ -37,7 +39,7 @@ def test_lorenz_curve():

def test_gini_coeff():
"""
Tests how the funciton `gini_coefficient` calculates the Gini coefficient
Tests how the function `gini_coefficient` calculates the Gini coefficient
with the Pareto and the Weibull distribution.

Analytically, we know that Pareto with parameter `a` has
Expand Down Expand Up @@ -88,3 +90,45 @@ def test_shorrocks_index():
index = shorrocks_index(P)
assert_allclose(expected, index, rtol=1e-2)


def test_rank_size():
"""
Tests `rank_size` function, which generates rank-size data for
a Pareto distribution.

The rank-size plot for a sample drawn from a Pareto distribution
should be a straight line.

The length of the `rank_data` array should be within (c x 100)%
of the size of the distribution.
"""

sample_size = 1000
c = 0.74

# Tests Pareto; r_squared ~ 1
pareto_draw = np.exp(np.random.exponential(scale=1.0, size=sample_size))
rank_data, size_data = rank_size(pareto_draw, c=c)

assert len(rank_data) == len(size_data)
assert_allclose(c*sample_size, len(rank_data), rtol=1e-3)

_, _, r_value, _, _ = linregress(np.log(rank_data), np.log(size_data))
r_sqval = r_value**2

assert_allclose(r_sqval, 1, rtol=1e-4)

# Tests Exponential; r_squared < 1
np.random.seed(13)
z = np.random.randn(sample_size)

exp_draw = np.exp(z)
rank_data_exp, size_data_exp = rank_size(exp_draw, c=c)

_, _, r_value_exp, _, _ = linregress(np.log(rank_data_exp),
np.log(size_data_exp))
r_sqval_exp = r_value_exp**2

assert_raises(AssertionError, assert_allclose, r_sqval_exp, 1, rtol=1e-4)