Merge pull request #414 from cdagnino/inequality_measures

Added basic inequality mesasures: lorenz curve and gini
QuantEcon · Oct 11, 2018 · b52559f · b52559f
2 parents c319203 + 176801b
commit b52559f
Show file tree

Hide file tree

Showing 3 changed files with 153 additions and 0 deletions.
diff --git a/quantecon/__init__.py b/quantecon/__init__.py
@@ -23,6 +23,7 @@
 # from .game_theory import <objects-here> 							#Place Holder if we wish to promote any general objects to the qe namespace.
 from .graph_tools import DiGraph, random_tournament_graph
 from .gridtools import cartesian, mlinspace, simplex_grid, simplex_index
+from .inequality import lorenz_curve, gini_coefficient
 from .kalman import Kalman
 from .lae import LAE
 from .arma import ARMA

diff --git a/quantecon/inequality.py b/quantecon/inequality.py
@@ -0,0 +1,86 @@
+"""
+Implements inequality and segregation measures such as Gini, Lorenz Curve
+
+"""
+
+import numpy as np
+from numba import njit, prange
+
+
+@njit
+def lorenz_curve(y):
+    """
+    Calculates the Lorenz Curve, a graphical representation of the distribution of income
+    or wealth.
+
+    It returns the cumulative share of people (x-axis) and the cumulative share of income earned
+
+    Parameters
+    ----------
+    y : array_like(float or int, ndim=1)
+        Array of income/wealth for each individual. Unordered or ordered is fine.
+
+    Returns
+    -------
+    cum_people : array_like(float, ndim=1)
+        Cumulative share of people for each person index (i/n)
+    cum_income : array_like(float, ndim=1)
+        Cumulative share of income for each person index
+
+
+    References
+    ----------
+    https://en.wikipedia.org/wiki/Lorenz_curve
+
+    Examples
+    --------
+    a_val, n = 3, 10_000
+    y = np.random.pareto(a_val, size=n)
+    f_vals, l_vals = lorenz(y)
+    #Plot
+    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
+    ax.plot(f_vals, l_vals, label="Pareto with a={0}".format(a_val))
+    fig.suptitle("Pareto distribution with a={0}".format(a_val))
+
+    """
+
+    n = len(y)
+    y = np.sort(y)
+    s = np.zeros(n + 1)
+    s[1:] = np.cumsum(y)
+    cum_people = np.zeros(n + 1)
+    cum_income = np.zeros(n + 1)
+    for i in range(1, n + 1):
+        cum_people[i] = i / n
+        cum_income[i] = s[i] / s[n]
+    return cum_people, cum_income
+
+
+@njit(parallel=True)
+def gini_coefficient(y):
+    r"""
+    Implements the Gini inequality index
+
+    Parameters
+    -----------
+    y : array_like(float)
+        Array of income/wealth for each individual. Ordered or unordered is fine
+
+    Returns
+    -------
+    Gini index: float
+        The gini index describing the inequality of the array of income/wealth
+
+    References
+    ----------
+
+    https://en.wikipedia.org/wiki/Gini_coefficient
+    """
+    n = len(y)
+    i_sum = np.zeros(n)
+    for i in prange(n):
+        for j in range(n):
+            i_sum[i] += abs(y[i] - y[j])
+    return np.sum(i_sum) / (2 * n * np.sum(y))
+
+
diff --git a/quantecon/tests/test_inequality.py b/quantecon/tests/test_inequality.py
@@ -0,0 +1,66 @@
+
+"""
+Tests for inequality.py
+
+"""
+
+import numpy as np
+from numpy.testing import assert_allclose
+from quantecon import lorenz_curve, gini_coefficient
+
+
+def test_lorenz_curve():
+    """
+    Tests `lorenz` function, which calculates the lorenz curve
+
+    An income distribution where everyone has almost the same wealth should
+    be similar to a straight line
+
+    An income distribution where one person has almost the wealth should
+    be flat and then shoot straight up when it approaches one
+    """
+    n = 3000
+
+    # Almost Equal distribution
+    y = np.repeat(1, n) + np.random.normal(scale=0.0001, size=n)
+    cum_people, cum_income = lorenz_curve(y)
+    assert_allclose(cum_people, cum_income, rtol=1e-03)
+
+    # Very uneven distribution
+    y = np.repeat(0.001, n)
+    y[4] = 100000
+    pop_cum, income_cum = lorenz_curve(y)
+    expected_income_cum = np.repeat(0., n + 1)
+    expected_income_cum[-1] = 1.
+    assert_allclose(expected_income_cum, income_cum, atol=1e-4)
+
+
+def test_gini_coeff():
+    """
+    Tests how the funciton `gini_coefficient` calculates the Gini coefficient
+    with the Pareto and the Weibull distribution.
+
+    Analytically, we know that Pareto with parameter `a` has
+    G = 1 / (2*a - 1)
+
+    Likewise, for the Weibull distribution with parameter `a` we know that
+    G = 1 - 2**(-1/a)
+
+    """
+    n = 10000
+
+    # Tests Pareto: G = 1 / (2*a - 1)
+    a = np.random.randint(2, 15)
+    expected = 1 / (2 * a - 1)
+
+    y = (np.random.pareto(a, size=n) + 1) * 2
+    coeff = gini_coefficient(y)
+    assert_allclose(expected, coeff, rtol=1e-01)
+
+    # Tests Weibull: G = 1 - 2**(-1/a)
+    a = np.random.randint(2, 15)
+    expected = 1 - 2 ** (-1 / a)
+
+    y = np.random.weibull(a, size=n)
+    coeff = gini_coefficient(y)
+    assert_allclose(expected, coeff, rtol=1e-01)