-
-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
Copy pathinequality.py
156 lines (118 loc) · 3.68 KB
/
inequality.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Implements inequality and segregation measures such as Gini, Lorenz Curve
"""
import numpy as np
from numba import njit, prange
@njit
def lorenz_curve(y):
"""
Calculates the Lorenz Curve, a graphical representation of the distribution of income
or wealth.
It returns the cumulative share of people (x-axis) and the cumulative share of income earned
Parameters
----------
y : array_like(float or int, ndim=1)
Array of income/wealth for each individual. Unordered or ordered is fine.
Returns
-------
cum_people : array_like(float, ndim=1)
Cumulative share of people for each person index (i/n)
cum_income : array_like(float, ndim=1)
Cumulative share of income for each person index
References
----------
.. [1] https://en.wikipedia.org/wiki/Lorenz_curve
Examples
--------
>>> a_val, n = 3, 10_000
>>> y = np.random.pareto(a_val, size=n)
>>> f_vals, l_vals = lorenz(y)
"""
n = len(y)
y = np.sort(y)
s = np.zeros(n + 1)
s[1:] = np.cumsum(y)
cum_people = np.zeros(n + 1)
cum_income = np.zeros(n + 1)
for i in range(1, n + 1):
cum_people[i] = i / n
cum_income[i] = s[i] / s[n]
return cum_people, cum_income
@njit(parallel=True)
def gini_coefficient(y):
r"""
Implements the Gini inequality index
Parameters
-----------
y : array_like(float)
Array of income/wealth for each individual. Ordered or unordered is fine
Returns
-------
Gini index: float
The gini index describing the inequality of the array of income/wealth
References
----------
https://en.wikipedia.org/wiki/Gini_coefficient
"""
n = len(y)
i_sum = np.zeros(n)
for i in prange(n):
for j in range(n):
i_sum[i] += abs(y[i] - y[j])
return np.sum(i_sum) / (2 * n * np.sum(y))
def shorrocks_index(A):
r"""
Implements Shorrocks mobility index
Parameters
-----------
A : array_like(float)
Square matrix with transition probabilities (mobility matrix) of
dimension m
Returns
--------
Shorrocks index: float
The Shorrocks mobility index calculated as
.. math::
s(A) = \frac{m - \sum_j a_{jj} }{m - 1} \in (0, 1)
An index equal to 0 indicates complete immobility.
References
-----------
.. [1] Wealth distribution and social mobility in the US: A quantitative approach
(Benhabib, Bisin, Luo, 2017).
https://www.econ.nyu.edu/user/bisina/RevisionAugust.pdf
"""
A = np.asarray(A) # Convert to array if not already
m, n = A.shape
if m != n:
raise ValueError('A must be a square matrix')
diag_sum = np.diag(A).sum()
return (m - diag_sum) / (m - 1)
def rank_size_plot(data, ax, label=None, c=1.0):
"""
Generate rank-size data corresponding to distribution data.
Examples
--------
> import numpy as np
> import matplotlib.pyplot as plt
> y = np.exp(np.random.randn(1000)) # simulate data
> fig, ax = plt.subplots()
> rank_size_plot(y, ax)
> plt.show()
Parameters
----------
data : array_like
the set of observations
c : int or float
restrict plot to top (c x 100)% of the distribution
ax : axis object
for plotting on, has method ax.loglog
"""
w = - np.sort(- data) # Reverse sort
w = w[:int(len(w) * c)] # extract top c%
rank_data = np.arange(len(w)) + 1
size_data = w
ax.loglog(rank_data, size_data, 'o', markersize=3.0, alpha=0.5, label=label)
if label:
ax.legend()
ax.set_xlabel("log rank")
ax.set_ylabel("log size")