Skip to content

Commit

Permalink
Merge pull request #414 from cdagnino/inequality_measures
Browse files Browse the repository at this point in the history
Added basic inequality mesasures: lorenz curve and gini
  • Loading branch information
mmcky authored Oct 11, 2018
2 parents c319203 + 176801b commit b52559f
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 0 deletions.
1 change: 1 addition & 0 deletions quantecon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# from .game_theory import <objects-here> #Place Holder if we wish to promote any general objects to the qe namespace.
from .graph_tools import DiGraph, random_tournament_graph
from .gridtools import cartesian, mlinspace, simplex_grid, simplex_index
from .inequality import lorenz_curve, gini_coefficient
from .kalman import Kalman
from .lae import LAE
from .arma import ARMA
Expand Down
86 changes: 86 additions & 0 deletions quantecon/inequality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Implements inequality and segregation measures such as Gini, Lorenz Curve
"""

import numpy as np
from numba import njit, prange


@njit
def lorenz_curve(y):
"""
Calculates the Lorenz Curve, a graphical representation of the distribution of income
or wealth.
It returns the cumulative share of people (x-axis) and the cumulative share of income earned
Parameters
----------
y : array_like(float or int, ndim=1)
Array of income/wealth for each individual. Unordered or ordered is fine.
Returns
-------
cum_people : array_like(float, ndim=1)
Cumulative share of people for each person index (i/n)
cum_income : array_like(float, ndim=1)
Cumulative share of income for each person index
References
----------
https://en.wikipedia.org/wiki/Lorenz_curve
Examples
--------
a_val, n = 3, 10_000
y = np.random.pareto(a_val, size=n)
f_vals, l_vals = lorenz(y)
#Plot
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.plot(f_vals, l_vals, label="Pareto with a={0}".format(a_val))
fig.suptitle("Pareto distribution with a={0}".format(a_val))
"""

n = len(y)
y = np.sort(y)
s = np.zeros(n + 1)
s[1:] = np.cumsum(y)
cum_people = np.zeros(n + 1)
cum_income = np.zeros(n + 1)
for i in range(1, n + 1):
cum_people[i] = i / n
cum_income[i] = s[i] / s[n]
return cum_people, cum_income


@njit(parallel=True)
def gini_coefficient(y):
r"""
Implements the Gini inequality index
Parameters
-----------
y : array_like(float)
Array of income/wealth for each individual. Ordered or unordered is fine
Returns
-------
Gini index: float
The gini index describing the inequality of the array of income/wealth
References
----------
https://en.wikipedia.org/wiki/Gini_coefficient
"""
n = len(y)
i_sum = np.zeros(n)
for i in prange(n):
for j in range(n):
i_sum[i] += abs(y[i] - y[j])
return np.sum(i_sum) / (2 * n * np.sum(y))


66 changes: 66 additions & 0 deletions quantecon/tests/test_inequality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

"""
Tests for inequality.py
"""

import numpy as np
from numpy.testing import assert_allclose
from quantecon import lorenz_curve, gini_coefficient


def test_lorenz_curve():
"""
Tests `lorenz` function, which calculates the lorenz curve
An income distribution where everyone has almost the same wealth should
be similar to a straight line
An income distribution where one person has almost the wealth should
be flat and then shoot straight up when it approaches one
"""
n = 3000

# Almost Equal distribution
y = np.repeat(1, n) + np.random.normal(scale=0.0001, size=n)
cum_people, cum_income = lorenz_curve(y)
assert_allclose(cum_people, cum_income, rtol=1e-03)

# Very uneven distribution
y = np.repeat(0.001, n)
y[4] = 100000
pop_cum, income_cum = lorenz_curve(y)
expected_income_cum = np.repeat(0., n + 1)
expected_income_cum[-1] = 1.
assert_allclose(expected_income_cum, income_cum, atol=1e-4)


def test_gini_coeff():
"""
Tests how the funciton `gini_coefficient` calculates the Gini coefficient
with the Pareto and the Weibull distribution.
Analytically, we know that Pareto with parameter `a` has
G = 1 / (2*a - 1)
Likewise, for the Weibull distribution with parameter `a` we know that
G = 1 - 2**(-1/a)
"""
n = 10000

# Tests Pareto: G = 1 / (2*a - 1)
a = np.random.randint(2, 15)
expected = 1 / (2 * a - 1)

y = (np.random.pareto(a, size=n) + 1) * 2
coeff = gini_coefficient(y)
assert_allclose(expected, coeff, rtol=1e-01)

# Tests Weibull: G = 1 - 2**(-1/a)
a = np.random.randint(2, 15)
expected = 1 - 2 ** (-1 / a)

y = np.random.weibull(a, size=n)
coeff = gini_coefficient(y)
assert_allclose(expected, coeff, rtol=1e-01)

0 comments on commit b52559f

Please sign in to comment.