forked from IATI/IATI-Dashboard
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomprehensiveness.py
181 lines (149 loc) · 8.49 KB
/
comprehensiveness.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# This file converts raw comprehensiveness data to percentages, and calculates averages.
from data import publishers_ordered_by_title, get_publisher_stats, publisher_name
columns = {
'summary': [
# Format for elements within this list - and similar lists below ('core', 'financials', etc):
# slug, header, weighting when calculating average
('core_average', 'Core Average', 2),
('financials_average', 'Financials Average', 1),
('valueadded_average', 'Value Added Average', 1),
('summary_average', 'Weighted Average', 0), # i.e. don't include the average within the calculation of the average
],
'core': [
('version', 'Version', 1),
('reporting-org', 'Reporting Organisation', 1),
('iati-identifier', 'IATI Identifier', 1),
('participating-org', 'Participating Organisation', 1),
('title', 'Title', 1),
('description', 'Description', 1),
('activity-status', 'Status', 1),
('activity-date', 'Activity Date', 1),
('sector', 'Sector', 1),
('country_or_region', 'Country or Region', 1),
('core_average', 'Average', 0), # i.e. don't include the average within the calculation of the average
],
'financials': [
('transaction_commitment', 'Transaction - Commitment', 1, 'first_hierarchy_with_commitments'),
('transaction_spend', 'Transaction - Disbursement or Expenditure', 1, 'bottom_hierarchy'),
('transaction_traceability', 'Transaction - Traceability', 1, 'bottom_hierarchy'),
('budget', 'Budget', 1, 'hierarchy_with_most_budgets'),
('financials_average', 'Average', 0), # i.e. don't include the average within the calculation of the average
],
'valueadded': [
('contact-info', 'Contacts', 1),
('location', 'Location Details', 1),
('location_point_pos', 'Geographic Coordinates', 1),
('sector_dac', 'DAC Sectors', 1),
('capital-spend', 'Capital Spend', 1),
('document-link', 'Activity Documents', 1),
('aid_type', 'Aid Type', 1),
('recipient_language', 'Recipient Language', 1),
('result_indicator', 'Result/ Indicator', 1),
('valueadded_average', 'Average', 0), # i.e. don't include the average within the calculation of the average
]}
# Build dictionaries for all the column_headers and column_slugs defined above
column_headers = {tabname: [x[1] for x in values] for tabname, values in columns.items()}
column_slugs = {tabname: [x[0] for x in values] for tabname, values in columns.items()}
# Build directory to lookup the hierarchy which should be used in the numerator
# e.g. {'activity-date': 'all', 'activity-status': 'all', [...] budget': 'hierarchy_with_most_budgets', [etc]}
column_base_lookup = {
col[0]: col[3] if len(col) > 3 else 'all'
for col_group, col_components in columns.items()
for col in col_components
}
def denominator(key, stats):
"""Return the appropriate denominator value for a given key.
Returns either the specifc demominator calculated, or a default denominator value.
"""
# If stats not pased to this function, return zero
if not stats:
return 0
# If there is a specific denominator for the given key, return this
if key in stats['comprehensiveness_denominators']:
return float(stats['comprehensiveness_denominators'][key])
# Otherwise, return the default denominator
else:
return float(stats['comprehensiveness_denominator_default'])
def get_hierarchy_with_most_budgets(stats):
"""Find the hierarchy which contains the greatest number of budgets.
Will only count hierarchies where the default denominator is greater than zero.
Input:
stats -- a JSONDir object of publisher stats
Returns:
Key of the hierarchy with greatest number of budgets, or None
"""
try:
# Get the key with the largest number of budgets
budgets = max(stats['by_hierarchy'], key=(
lambda x:
stats['by_hierarchy'][x]['comprehensiveness'].get('budget', 0) + stats['by_hierarchy'][x]['comprehensiveness'].get('budget_not_provided', 0)
if stats['by_hierarchy'][x]['comprehensiveness_denominator_default'] > 0 else -1)
)
return budgets
except KeyError:
# Return None if this publisher has no comprehensiveness data in any hierarchy - i.e. KeyError
return None
except ValueError:
# Some publishers have no data in 'by_hierarchy' at all - i.e. ValueError: max() arg is an empty sequence
return None
def get_first_hierarchy_with_commitments(stats):
"""Return the number of the first hierarchy that contains at least 1 commitment
(according to the comprehensiveness counts)
Returns:
Number of first hierarchy with commitments or None if no commitments in any hierarchy
"""
hierarchies_with_commitments = {x: y['comprehensiveness']['transaction_commitment']
for x, y in stats.get('by_hierarchy', {}).items()
if y['comprehensiveness'].get('transaction_commitment', 0) > 0}
return min(hierarchies_with_commitments) if len(hierarchies_with_commitments) else None
def generate_row(publisher):
"""Generate comprehensiveness table data for a given publisher
"""
publisher_stats = get_publisher_stats(publisher)
# Set an inital dictionary, which will later be populated further
row = {}
row['publisher'] = publisher
row['publisher_title'] = publisher_name[publisher]
# Calculate percentages for publisher data populated with any data
for slug in column_slugs['core'] + column_slugs['financials'] + column_slugs['valueadded']:
# Set the stats base for calculating the numerator. This is based on the hierarchy set in the lookup
if column_base_lookup[slug] == 'bottom_hierarchy':
publisher_base = publisher_stats.get('bottom_hierarchy', {})
elif column_base_lookup[slug] == 'hierarchy_with_most_budgets':
publisher_base = publisher_stats['by_hierarchy'].get(get_hierarchy_with_most_budgets(publisher_stats), {})
elif column_base_lookup[slug] == 'first_hierarchy_with_commitments':
if get_first_hierarchy_with_commitments(publisher_stats):
publisher_base = publisher_stats['by_hierarchy'].get(get_first_hierarchy_with_commitments(publisher_stats), {})
else:
publisher_base = publisher_stats.get('bottom_hierarchy', {})
else:
# Most common case will be column_base_lookup[slug] == 'all':
publisher_base = publisher_stats
if slug == 'budget':
budget_all = publisher_base.get('comprehensiveness', {}).get(slug, 0)
budget_not_provided_all = publisher_base.get('comprehensiveness', {}).get('budget_not_provided', 0)
numerator_all = budget_all + budget_not_provided_all
budget_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0)
budget_not_provided_valid = publisher_base.get('comprehensiveness_with_validation', {}).get('budget_not_provided', 0)
numerator_valid = budget_valid + budget_not_provided_valid
else:
numerator_all = publisher_base.get('comprehensiveness', {}).get(slug, 0)
numerator_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0)
if denominator(slug, publisher_base) != 0:
# Populate the row with the %age
row[slug] = float(numerator_all) / denominator(slug, publisher_base) * 100
row[slug + '_valid'] = float(numerator_valid) / denominator(slug, publisher_base) * 100
# Loop for averages
# Calculate the average for each grouping, and the overall 'summary' average
for page in ['core', 'financials', 'valueadded', 'summary']:
# Note that the summary must be last, so that it can use the average calculations from the other groupings
row[page + '_average'] = sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page]))
row[page + '_average_valid'] = sum((row.get(x[0] + '_valid') or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page]))
return row
def table():
"""Generate comprehensiveness table data for every publisher and return as a generator object
"""
# Loop over the data for each publisher
for publisher_title, publisher in publishers_ordered_by_title:
# Generate a row object
yield generate_row(publisher)