-
Notifications
You must be signed in to change notification settings - Fork 0
/
plotoageo.py
127 lines (108 loc) · 5.6 KB
/
plotoageo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import argparse
from cProfile import label
import pandas as pd
pd.set_option('display.max_rows',200)
import numpy as np
import matplotlib.pyplot as plt
import geopandas
dist_level_mapper=dict()
dist_level_mapper[1]='continent'
dist_level_mapper[2]='region'
dist_level_mapper[3]='area'
def main():
parser = argparse.ArgumentParser()
parser.add_argument('inputfile')
parser.add_argument('inputfile_geojson')
parser.add_argument('-l','--limit', type=int, default=None)
parser.add_argument('--quiet', action='store_true')
parser.add_argument("--tdwg_wgsrpd_level", default=2, type=int)
parser.add_argument("--tax_novs_only", action='store_true')
parser.add_argument('--plot-maps', action='store_true')
parser.add_argument('--plot-composite', action='store_true')
parser.add_argument('--outputfile_composite', default=None)
parser.add_argument('-d','--delimiter', type=str, default='\t')
parser.add_argument('--year_min',default=2012)
parser.add_argument('--year_max',default=2021)
parser.add_argument('outputfile_oa')
parser.add_argument('outputfile_unknown')
args = parser.parse_args()
###########################################################################
# 1. Read data files
###########################################################################
df = pd.read_csv(args.inputfile, sep=args.delimiter, nrows=args.limit)
df = df.replace({np.nan:None})
print('Read {} of {} grouped WCVP dist rows'.format(args.inputfile, len(df)))
###########################################################################
# 2. Preparation
###########################################################################
# 2.1 Add placeholder for NULL values in is_oa and oa_status fields
df.is_oa.fillna('n/a',inplace=True)
#
# 2.2 Rename columns
column_renames = {'is_oa':'Open access'}
df.rename(columns=column_renames,inplace=True)
#
# 2.3 Use TDWG WGSRPD level to determine area name column (continent, region, area etc)
area_name_column = dist_level_mapper[args.tdwg_wgsrpd_level]
print(args.tdwg_wgsrpd_level, area_name_column)
#
# 2.4 Pivot table to get a column per Open access (T, F or n/a), values are totals
df = df.pivot_table(index=area_name_column,columns='Open access',values='contribution').reset_index()
df.columns=[area_name_column.capitalize(),'OA_false','OA_true','OA_n/a']
print(df)
#
# 2.5 Calculate fractions of OA and unfindables
if (args.plot_maps):
df['total']=df.sum(axis=1)
df['OA_ratio'] = df['OA_true']/df['OA_false']
df['OA_unfind'] = df['OA_n/a']/df['total']
df['OA_unfind'] = df['OA_unfind']*100
df.drop(columns='total',inplace=True)
print(df)
# 2.6 Import the map of the world
world = geopandas.read_file(args.inputfile_geojson)
column_renames = {area_name_column.capitalize():'LEVEL{}_NAM'.format(args.tdwg_wgsrpd_level)}
df.rename(columns=column_renames,inplace=True)
world = pd.merge(world, df, on='LEVEL{}_NAM'.format(args.tdwg_wgsrpd_level))
###########################################################################
# 3. Plot and save figure to outputfile
###########################################################################
# 3.1 plotting the ratio between open and closed access
fig, ax = plt.subplots(1, 1)
world.plot(column='OA_ratio',ax=ax, legend=True, legend_kwds=dict(loc='lower left',fontsize='x-small'), cmap='OrRd', scheme='quantiles')
coverage = 'all'
if args.tax_novs_only:
coverage = 'tax. nov.'
plt.title("Ratio of open:closed access of {} IPNI nomenclatural acts ({}-{})".format(coverage,args.year_min,args.year_max))
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
fig.tight_layout(pad=0)
plt.savefig(args.outputfile_oa, bbox_inches='tight',pad_inches = 0.1, dpi = 400)
# 3.2 plotting the percentage of unfindable publications
fig, ax = plt.subplots(1, 1)
world.plot(column='OA_unfind',ax=ax, legend=True, legend_kwds=dict(loc='lower left',fontsize='x-small'), cmap='OrRd', scheme='quantiles')
plt.title("Proportion of {} IPNI nomenclatural acts ({}-{}) which are non-discoverable".format(coverage,args.year_min,args.year_max))
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
fig.tight_layout(pad=0)
plt.savefig(args.outputfile_unknown, bbox_inches='tight',pad_inches = 0.1, dpi = 400)
# 3.3 plot composite if required
if args.plot_composite:
fig, ax = plt.subplots(2, 1)
# Repeat of above - TODO extract to reusable method
world.plot(column='OA_unfind',ax=ax[0], legend=True, legend_kwds=dict(loc='lower left',fontsize='x-small'), cmap='OrRd', scheme='quantiles')
ax[0].set_title("Proportion of {} IPNI nomenclatural acts ({}-{}) which are non-discoverable".format(coverage,args.year_min,args.year_max))
ax[0].xaxis.set_visible(False)
ax[0].yaxis.set_visible(False)
# Repeat of above - TODO extract to reusable method
world.plot(column='OA_ratio',ax=ax[1], legend=True, legend_kwds=dict(loc='lower left',fontsize='x-small'), cmap='OrRd', scheme='quantiles')
coverage = 'all'
if args.tax_novs_only:
coverage = 'tax. nov.'
ax[1].set_title("Ratio of open:closed access of {} IPNI nomenclatural acts ({}-{})".format(coverage,args.year_min,args.year_max))
ax[1].xaxis.set_visible(False)
ax[1].yaxis.set_visible(False)
fig.tight_layout(pad=0)
plt.savefig(args.outputfile_composite, bbox_inches='tight',pad_inches = 0.1, dpi = 400)
if __name__ == "__main__":
main()