Skip to content

Commit

Permalink
Switch to output YAML
Browse files Browse the repository at this point in the history
  • Loading branch information
nickynicolson committed Dec 5, 2022
1 parent ac9c179 commit d049e56
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 27 deletions.
30 changes: 15 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,49 +102,49 @@ data/gbif-typesloc.zip: types2publisherlocations.py data/gbif-types.zip download
# All types

# Analyse how many taxa have type material in GBIF
data/taxa2gbiftypeavailability.csv data/taxa2gbiftypeavailability.md: taxa2gbiftypeavailability.py data/gbif2wcvp.csv data/gbif-types.zip
$(python_launch_cmd) $^ $(limit_args) data/taxa2gbiftypeavailability.csv data/taxa2gbiftypeavailability.md
data/taxa2gbiftypeavailability.csv data/taxa2gbiftypeavailability.yaml: taxa2gbiftypeavailability.py data/gbif2wcvp.csv data/gbif-types.zip
$(python_launch_cmd) $^ $(limit_args) data/taxa2gbiftypeavailability.csv data/taxa2gbiftypeavailability.yaml

# Analyse how many taxa have type material published from within native range
data/taxa2nativerangetypeavailability.csv data/taxa2nativerangetypeavailability.md: taxa2nativerangetypeavailability.py data/gbif2wcvp.csv downloads/wcvp_dist.txt data/gbif-types.zip data/gbif-typesloc.zip downloads/tdwg_wgsrpd_l3.json
$(python_launch_cmd) $^ $(limit_args) data/taxa2nativerangetypeavailability.csv data/taxa2nativerangetypeavailability.md
data/taxa2nativerangetypeavailability.csv data/taxa2nativerangetypeavailability.yaml: taxa2nativerangetypeavailability.py data/gbif2wcvp.csv downloads/wcvp_dist.txt data/gbif-types.zip data/gbif-typesloc.zip downloads/tdwg_wgsrpd_l3.json
$(python_launch_cmd) $^ $(limit_args) data/taxa2nativerangetypeavailability.csv data/taxa2nativerangetypeavailability.yaml

###############################################################################
# Post-CBD

cbd_impl_year:=1992

# Analyse how many taxa have type material in GBIF
data/taxa2gbiftypeavailability-cbd.csv data/taxa2gbiftypeavailability-cbd.md: taxa2gbiftypeavailability.py data/gbif2wcvp.csv data/gbif-types.zip
$(python_launch_cmd) $^ $(limit_args) --year_min=$(cbd_impl_year) data/taxa2gbiftypeavailability-cbd.csv data/taxa2gbiftypeavailability-cbd.md
data/taxa2gbiftypeavailability-cbd.csv data/taxa2gbiftypeavailability-cbd.yaml: taxa2gbiftypeavailability.py data/gbif2wcvp.csv data/gbif-types.zip
$(python_launch_cmd) $^ $(limit_args) --year_min=$(cbd_impl_year) data/taxa2gbiftypeavailability-cbd.csv data/taxa2gbiftypeavailability-cbd.yaml

# Analyse how many taxa have type material published from within native range
data/taxa2nativerangetypeavailability-cbd.csv data/taxa2nativerangetypeavailability-cbd.md: taxa2nativerangetypeavailability.py data/gbif2wcvp.csv downloads/wcvp_dist.txt data/gbif-types.zip data/gbif-typesloc.zip downloads/tdwg_wgsrpd_l3.json
$(python_launch_cmd) $^ $(limit_args) --year_min=$(cbd_impl_year) data/taxa2nativerangetypeavailability-cbd.csv data/taxa2nativerangetypeavailability-cbd.md
data/taxa2nativerangetypeavailability-cbd.csv data/taxa2nativerangetypeavailability-cbd.yaml: taxa2nativerangetypeavailability.py data/gbif2wcvp.csv downloads/wcvp_dist.txt data/gbif-types.zip data/gbif-typesloc.zip downloads/tdwg_wgsrpd_l3.json
$(python_launch_cmd) $^ $(limit_args) --year_min=$(cbd_impl_year) data/taxa2nativerangetypeavailability-cbd.csv data/taxa2nativerangetypeavailability-cbd.yaml

###############################################################################
# Post-Nagoya

nagoya_impl_year:=2014

# Analyse how many taxa have type material in GBIF
data/taxa2gbiftypeavailability-nagoya.csv data/taxa2gbiftypeavailability-nagoya.md: taxa2gbiftypeavailability.py data/gbif2wcvp.csv data/gbif-types.zip
$(python_launch_cmd) $^ $(limit_args) --year_min=$(nagoya_impl_year) data/taxa2gbiftypeavailability-nagoya.csv data/taxa2gbiftypeavailability-nagoya.md
data/taxa2gbiftypeavailability-nagoya.csv data/taxa2gbiftypeavailability-nagoya.yaml: taxa2gbiftypeavailability.py data/gbif2wcvp.csv data/gbif-types.zip
$(python_launch_cmd) $^ $(limit_args) --year_min=$(nagoya_impl_year) data/taxa2gbiftypeavailability-nagoya.csv data/taxa2gbiftypeavailability-nagoya.yaml

# Analyse how many taxa have type material published from within native range
data/taxa2nativerangetypeavailability-nagoya.csv data/taxa2nativerangetypeavailability-nagoya.md: taxa2nativerangetypeavailability.py data/gbif2wcvp.csv downloads/wcvp_dist.txt data/gbif-types.zip data/gbif-typesloc.zip downloads/tdwg_wgsrpd_l3.json
$(python_launch_cmd) $^ $(limit_args) --year_min=$(nagoya_impl_year) data/taxa2nativerangetypeavailability-nagoya.csv data/taxa2nativerangetypeavailability-nagoya.md
data/taxa2nativerangetypeavailability-nagoya.csv data/taxa2nativerangetypeavailability-nagoya.yaml: taxa2nativerangetypeavailability.py data/gbif2wcvp.csv downloads/wcvp_dist.txt data/gbif-types.zip data/gbif-typesloc.zip downloads/tdwg_wgsrpd_l3.json
$(python_launch_cmd) $^ $(limit_args) --year_min=$(nagoya_impl_year) data/taxa2nativerangetypeavailability-nagoya.csv data/taxa2nativerangetypeavailability-nagoya.yaml


all: data/taxa2gbiftypeavailability.md data/taxa2nativerangetypeavailability.md data/taxa2gbiftypeavailability-cbd.md data/taxa2nativerangetypeavailability-cbd.md data/taxa2gbiftypeavailability-nagoya.md data/taxa2nativerangetypeavailability-nagoya.md
all: data/taxa2gbiftypeavailability.yaml data/taxa2nativerangetypeavailability.yaml data/taxa2gbiftypeavailability-cbd.yaml data/taxa2nativerangetypeavailability-cbd.yaml data/taxa2gbiftypeavailability-nagoya.yaml data/taxa2nativerangetypeavailability-nagoya.yaml

data_archive_zip:=$(shell basename $(CURDIR))-data.zip
downloads_archive_zip:=$(shell basename $(CURDIR))-downloads.zip

archive: data/taxa2gbiftypeavailability.md data/taxa2nativerangetypeavailability.md data/taxa2gbiftypeavailability-cbd.md data/taxa2nativerangetypeavailability-cbd.md data/taxa2gbiftypeavailability-nagoya.md data/taxa2nativerangetypeavailability-nagoya.md
archive: data/taxa2gbiftypeavailability.yaml data/taxa2nativerangetypeavailability.yaml data/taxa2gbiftypeavailability-cbd.yaml data/taxa2nativerangetypeavailability-cbd.yaml data/taxa2gbiftypeavailability-nagoya.yaml data/taxa2nativerangetypeavailability-nagoya.yaml
mkdir -p archive
echo "Archived on $(date_formatted)" >> data/archive-info.txt
zip archive/$(data_archive_zip) data/*.md -r
zip archive/$(data_archive_zip) data/*.yaml -r
echo "Archived on $(date_formatted)" >> downloads/archive-info.txt
zip archive/$(downloads_archive_zip) downloads/* -r

Expand Down
17 changes: 11 additions & 6 deletions taxa2gbiftypeavailability.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unidecode import unidecode
import re
from pygbif import registry
import yaml

def main():
parser = argparse.ArgumentParser()
Expand All @@ -14,7 +15,7 @@ def main():
parser.add_argument('--delimiter_occ', type=str, default='\t')
parser.add_argument('--year_min', type=int, default=None)
parser.add_argument("outputfile_data", type=str)
parser.add_argument("outputfile_md", type=str)
parser.add_argument("outputfile_yaml", type=str)
args = parser.parse_args()

###########################################################################
Expand Down Expand Up @@ -62,11 +63,15 @@ def main():
mask = (df.typeStatus.notnull())
type_status_available_count = df[mask].accepted_id.nunique()
total_taxa_count = df.accepted_id.nunique()
with open(args.outputfile_md,mode='w') as f:
summary_message = '{:.2%} taxa have type material available ({} of {})'.format(type_status_available_count/total_taxa_count, type_status_available_count, total_taxa_count)
print('Writing {} to {}'.format(summary_message, args.outputfile_md))
f.write(summary_message)

analysis_variables = dict()
analysis_variables['taxon_count'] = total_taxa_count
analysis_variables['taxa_with_types_available_count'] = type_status_available_count
analysis_variables['taxa_with_types_available_pc'] = round((type_status_available_count/total_taxa_count)*100)
output_variables = dict()
output_variables['taxa2gbiftypeavailability']=analysis_variables
with open(args.outputfile_yaml, 'w') as f:
yaml.dump(output_variables, f)

###########################################################################
# 4. Output
###########################################################################
Expand Down
20 changes: 14 additions & 6 deletions taxa2nativerangetypeavailability.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
from pygbif import registry
import numpy as np
import yaml

def main():
parser = argparse.ArgumentParser()
Expand All @@ -20,7 +21,7 @@ def main():
parser.add_argument('--delimiter_publ', type=str, default='\t')
parser.add_argument("inputfile_tdwg_wgsrpd_l3_json", type=str)
parser.add_argument("outputfile_data", type=str)
parser.add_argument("outputfile_md", type=str)
parser.add_argument("outputfile_yaml", type=str)
args = parser.parse_args()

###########################################################################
Expand Down Expand Up @@ -121,22 +122,29 @@ def main():
wgsrpd_columns = {'continent_code_l1':'publishingOrg_continent_code_l1',
'region_code_l2':'publishingOrg_region_code_l2',
'area_code_l3':'publishingOrg_area_code_l3'}
analysis_variables = dict()
analysis_variables['taxon_count'] = accepted_id_count
summary_message=""
for (distribution_loc, publishing_org_loc) in wgsrpd_columns.items():
mask=(df[distribution_loc] == df[publishing_org_loc])
accepted_id_served_from_within_native_range_count = df[mask].accepted_id.nunique()
accepted_id_count = df.accepted_id.nunique()
summary_message += ('- {:.2%} taxa ({} of {}) are represented by type material served from within their native range in {}\n'.format(accepted_id_served_from_within_native_range_count/accepted_id_count, accepted_id_served_from_within_native_range_count, accepted_id_count, distribution_loc))
print(summary_message)
current_level_variables = dict()
current_level_variables['taxon_represented_total']=accepted_id_served_from_within_native_range_count
current_level_variables['taxon_represented_pc']=round((accepted_id_served_from_within_native_range_count/accepted_id_count)*100)
analysis_variables[distribution_loc] = current_level_variables

output_variables = dict()
output_variables['taxa2nativerangetypeavailability'] = analysis_variables

# ###########################################################################
# # 4. Output
# ###########################################################################
#
# 4.1 markdown format statement
with open(args.outputfile_md, 'w') as f:
print(summary_message)
f.write(summary_message)
# 4.1 YAML format data variables
with open(args.outputfile_yaml, 'w') as f:
yaml.dump(output_variables, f)

# 4.2 Data
# TBC
Expand Down

0 comments on commit d049e56

Please sign in to comment.