-
-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #414 from cdagnino/inequality_measures
Added basic inequality mesasures: lorenz curve and gini
- Loading branch information
Showing
3 changed files
with
153 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""" | ||
Implements inequality and segregation measures such as Gini, Lorenz Curve | ||
""" | ||
|
||
import numpy as np | ||
from numba import njit, prange | ||
|
||
|
||
@njit | ||
def lorenz_curve(y): | ||
""" | ||
Calculates the Lorenz Curve, a graphical representation of the distribution of income | ||
or wealth. | ||
It returns the cumulative share of people (x-axis) and the cumulative share of income earned | ||
Parameters | ||
---------- | ||
y : array_like(float or int, ndim=1) | ||
Array of income/wealth for each individual. Unordered or ordered is fine. | ||
Returns | ||
------- | ||
cum_people : array_like(float, ndim=1) | ||
Cumulative share of people for each person index (i/n) | ||
cum_income : array_like(float, ndim=1) | ||
Cumulative share of income for each person index | ||
References | ||
---------- | ||
https://en.wikipedia.org/wiki/Lorenz_curve | ||
Examples | ||
-------- | ||
a_val, n = 3, 10_000 | ||
y = np.random.pareto(a_val, size=n) | ||
f_vals, l_vals = lorenz(y) | ||
#Plot | ||
fig, ax = plt.subplots(1, 1, figsize=(5, 5)) | ||
ax.plot(f_vals, l_vals, label="Pareto with a={0}".format(a_val)) | ||
fig.suptitle("Pareto distribution with a={0}".format(a_val)) | ||
""" | ||
|
||
n = len(y) | ||
y = np.sort(y) | ||
s = np.zeros(n + 1) | ||
s[1:] = np.cumsum(y) | ||
cum_people = np.zeros(n + 1) | ||
cum_income = np.zeros(n + 1) | ||
for i in range(1, n + 1): | ||
cum_people[i] = i / n | ||
cum_income[i] = s[i] / s[n] | ||
return cum_people, cum_income | ||
|
||
|
||
@njit(parallel=True) | ||
def gini_coefficient(y): | ||
r""" | ||
Implements the Gini inequality index | ||
Parameters | ||
----------- | ||
y : array_like(float) | ||
Array of income/wealth for each individual. Ordered or unordered is fine | ||
Returns | ||
------- | ||
Gini index: float | ||
The gini index describing the inequality of the array of income/wealth | ||
References | ||
---------- | ||
https://en.wikipedia.org/wiki/Gini_coefficient | ||
""" | ||
n = len(y) | ||
i_sum = np.zeros(n) | ||
for i in prange(n): | ||
for j in range(n): | ||
i_sum[i] += abs(y[i] - y[j]) | ||
return np.sum(i_sum) / (2 * n * np.sum(y)) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
|
||
""" | ||
Tests for inequality.py | ||
""" | ||
|
||
import numpy as np | ||
from numpy.testing import assert_allclose | ||
from quantecon import lorenz_curve, gini_coefficient | ||
|
||
|
||
def test_lorenz_curve(): | ||
""" | ||
Tests `lorenz` function, which calculates the lorenz curve | ||
An income distribution where everyone has almost the same wealth should | ||
be similar to a straight line | ||
An income distribution where one person has almost the wealth should | ||
be flat and then shoot straight up when it approaches one | ||
""" | ||
n = 3000 | ||
|
||
# Almost Equal distribution | ||
y = np.repeat(1, n) + np.random.normal(scale=0.0001, size=n) | ||
cum_people, cum_income = lorenz_curve(y) | ||
assert_allclose(cum_people, cum_income, rtol=1e-03) | ||
|
||
# Very uneven distribution | ||
y = np.repeat(0.001, n) | ||
y[4] = 100000 | ||
pop_cum, income_cum = lorenz_curve(y) | ||
expected_income_cum = np.repeat(0., n + 1) | ||
expected_income_cum[-1] = 1. | ||
assert_allclose(expected_income_cum, income_cum, atol=1e-4) | ||
|
||
|
||
def test_gini_coeff(): | ||
""" | ||
Tests how the funciton `gini_coefficient` calculates the Gini coefficient | ||
with the Pareto and the Weibull distribution. | ||
Analytically, we know that Pareto with parameter `a` has | ||
G = 1 / (2*a - 1) | ||
Likewise, for the Weibull distribution with parameter `a` we know that | ||
G = 1 - 2**(-1/a) | ||
""" | ||
n = 10000 | ||
|
||
# Tests Pareto: G = 1 / (2*a - 1) | ||
a = np.random.randint(2, 15) | ||
expected = 1 / (2 * a - 1) | ||
|
||
y = (np.random.pareto(a, size=n) + 1) * 2 | ||
coeff = gini_coefficient(y) | ||
assert_allclose(expected, coeff, rtol=1e-01) | ||
|
||
# Tests Weibull: G = 1 - 2**(-1/a) | ||
a = np.random.randint(2, 15) | ||
expected = 1 - 2 ** (-1 / a) | ||
|
||
y = np.random.weibull(a, size=n) | ||
coeff = gini_coefficient(y) | ||
assert_allclose(expected, coeff, rtol=1e-01) |