Skip to content

Commit

Permalink
update world_bank scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
cjyetman committed Sep 9, 2023
1 parent 848e9ce commit dba113f
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 112 deletions.
212 changes: 106 additions & 106 deletions dictionary/data_world_bank.csv
Original file line number Diff line number Diff line change
@@ -1,219 +1,219 @@
country,wb
Aruba,ABW
Afghanistan,AFG
Angola,AGO
Albania,ALB
Algeria,DZA
American Samoa,ASM
Andorra,AND
Angola,AGO
Antigua and Barbuda,ATG
United Arab Emirates,ARE
Argentina,ARG
Armenia,ARM
Aruba,ABW
American Samoa,ASM
Antigua and Barbuda,ATG
Australia,AUS
Austria,AUT
Azerbaijan,AZE
"Bahamas, The",BHS
Bahrain,BHR
Burundi,BDI
Belgium,BEL
Benin,BEN
Burkina Faso,BFA
Bangladesh,BGD
Barbados,BRB
Bulgaria,BGR
Bahrain,BHR
"Bahamas, The",BHS
Bosnia and Herzegovina,BIH
Belarus,BLR
Belgium,BEL
Belize,BLZ
Benin,BEN
Bermuda,BMU
Bhutan,BTN
Bolivia,BOL
Bosnia and Herzegovina,BIH
Botswana,BWA
Brazil,BRA
British Virgin Islands,VGB
Barbados,BRB
Brunei Darussalam,BRN
Bulgaria,BGR
Burkina Faso,BFA
Burundi,BDI
Cabo Verde,CPV
Cambodia,KHM
Cameroon,CMR
Canada,CAN
Cayman Islands,CYM
Bhutan,BTN
Botswana,BWA
Central African Republic,CAF
Chad,TCD
Canada,CAN
Switzerland,CHE
Channel Islands,CHI
Chile,CHL
China,CHN
Colombia,COL
Comoros,COM
Côte d’Ivoire,CIV
Cameroon,CMR
"Congo, Dem. Rep.",COD
"Congo, Rep.",COG
Colombia,COL
Comoros,COM
Cabo Verde,CPV
Costa Rica,CRI
Côte d'Ivoire,CIV
Croatia,HRV
Cuba,CUB
Curaçao,CUW
Cayman Islands,CYM
Cyprus,CYP
Czech Republic,CZE
Denmark,DNK
Germany,DEU
Djibouti,DJI
Dominica,DMA
Denmark,DNK
Dominican Republic,DOM
Algeria,DZA
Ecuador,ECU
"Egypt, Arab Rep.",EGY
El Salvador,SLV
Equatorial Guinea,GNQ
Eritrea,ERI
Spain,ESP
Estonia,EST
Eswatini,SWZ
Ethiopia,ETH
Faroe Islands,FRO
Fiji,FJI
Finland,FIN
Fiji,FJI
France,FRA
French Polynesia,PYF
Faroe Islands,FRO
"Micronesia, Fed. Sts.",FSM
Gabon,GAB
"Gambia, The",GMB
United Kingdom,GBR
Georgia,GEO
Germany,DEU
Ghana,GHA
Gibraltar,GIB
Guinea,GIN
"Gambia, The",GMB
Guinea-Bissau,GNB
Equatorial Guinea,GNQ
Greece,GRC
Greenland,GRL
Grenada,GRD
Guam,GUM
Greenland,GRL
Guatemala,GTM
Guinea,GIN
Guinea-Bissau,GNB
Guam,GUM
Guyana,GUY
Haiti,HTI
Honduras,HND
"Hong Kong SAR, China",HKG
Honduras,HND
Croatia,HRV
Haiti,HTI
Hungary,HUN
Iceland,ISL
India,IND
Indonesia,IDN
Isle of Man,IMN
India,IND
Ireland,IRL
"Iran, Islamic Rep.",IRN
Iraq,IRQ
Ireland,IRL
Isle of Man,IMN
Iceland,ISL
Israel,ISR
Italy,ITA
Jamaica,JAM
Japan,JPN
Jordan,JOR
Japan,JPN
Kazakhstan,KAZ
Kenya,KEN
Kyrgyz Republic,KGZ
Cambodia,KHM
Kiribati,KIR
"Korea, Dem. People's Rep.",PRK
St. Kitts and Nevis,KNA
"Korea, Rep.",KOR
Kosovo,XKX
Kuwait,KWT
Kyrgyz Republic,KGZ
Lao PDR,LAO
Latvia,LVA
Lebanon,LBN
Lesotho,LSO
Liberia,LBR
Libya,LBY
St. Lucia,LCA
Liechtenstein,LIE
Sri Lanka,LKA
Lesotho,LSO
Lithuania,LTU
Luxembourg,LUX
Latvia,LVA
"Macao SAR, China",MAC
St. Martin (French part),MAF
Morocco,MAR
Monaco,MCO
Moldova,MDA
Madagascar,MDG
Malawi,MWI
Malaysia,MYS
Maldives,MDV
Mexico,MEX
Marshall Islands,MHL
North Macedonia,MKD
Mali,MLI
Malta,MLT
Marshall Islands,MHL
Mauritania,MRT
Mauritius,MUS
Mexico,MEX
"Micronesia, Fed. Sts.",FSM
Moldova,MDA
Monaco,MCO
Mongolia,MNG
Myanmar,MMR
Montenegro,MNE
Morocco,MAR
Mongolia,MNG
Northern Mariana Islands,MNP
Mozambique,MOZ
Myanmar,MMR
Mauritania,MRT
Mauritius,MUS
Malawi,MWI
Malaysia,MYS
Namibia,NAM
Nauru,NRU
Nepal,NPL
Netherlands,NLD
New Caledonia,NCL
New Zealand,NZL
Nicaragua,NIC
Niger,NER
Nigeria,NGA
North Macedonia,MKD
Northern Mariana Islands,MNP
Nicaragua,NIC
Netherlands,NLD
Norway,NOR
Nepal,NPL
Nauru,NRU
New Zealand,NZL
Oman,OMN
Pakistan,PAK
Palau,PLW
Panama,PAN
Papua New Guinea,PNG
Paraguay,PRY
Peru,PER
Philippines,PHL
Palau,PLW
Papua New Guinea,PNG
Poland,POL
Portugal,PRT
Puerto Rico,PRI
"Korea, Dem. People's Rep.",PRK
Portugal,PRT
Paraguay,PRY
West Bank and Gaza,PSE
French Polynesia,PYF
Qatar,QAT
Romania,ROU
Russian Federation,RUS
Rwanda,RWA
Samoa,WSM
San Marino,SMR
São Tomé and Principe,STP
Saudi Arabia,SAU
Sudan,SDN
Senegal,SEN
Serbia,SRB
Seychelles,SYC
Sierra Leone,SLE
Singapore,SGP
Sint Maarten (Dutch part),SXM
Slovak Republic,SVK
Slovenia,SVN
Solomon Islands,SLB
Sierra Leone,SLE
El Salvador,SLV
San Marino,SMR
Somalia,SOM
South Africa,ZAF
Serbia,SRB
South Sudan,SSD
Spain,ESP
Sri Lanka,LKA
St. Kitts and Nevis,KNA
St. Lucia,LCA
St. Martin (French part),MAF
St. Vincent and the Grenadines,VCT
Sudan,SDN
São Tomé and Príncipe,STP
Suriname,SUR
Slovak Republic,SVK
Slovenia,SVN
Sweden,SWE
Switzerland,CHE
Eswatini,SWZ
Sint Maarten (Dutch part),SXM
Seychelles,SYC
Syrian Arab Republic,SYR
"Taiwan, China",TWN
Tajikistan,TJK
Tanzania,TZA
Turks and Caicos Islands,TCA
Chad,TCD
Togo,TGO
Thailand,THA
Tajikistan,TJK
Turkmenistan,TKM
Timor-Leste,TLS
Togo,TGO
Tonga,TON
Trinidad and Tobago,TTO
Tunisia,TUN
Turkey,TUR
Turkmenistan,TKM
Turks and Caicos Islands,TCA
Türkiye,TUR
Tuvalu,TUV
"Taiwan, China",TWN
Tanzania,TZA
Uganda,UGA
Ukraine,UKR
United Arab Emirates,ARE
United Kingdom,GBR
United States,USA
Uruguay,URY
United States,USA
Uzbekistan,UZB
Vanuatu,VUT
St. Vincent and the Grenadines,VCT
"Venezuela, RB",VEN
Vietnam,VNM
British Virgin Islands,VGB
Virgin Islands (U.S.),VIR
West Bank and Gaza,PSE
Vietnam,VNM
Vanuatu,VUT
Samoa,WSM
Kosovo,XKX
"Yemen, Rep.",YEM
South Africa,ZAF
Zambia,ZMB
Zimbabwe,ZWE
14 changes: 8 additions & 6 deletions dictionary/get_world_bank.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
source(here::here('dictionary/utilities.R'))

url <- 'http://databank.worldbank.org/data/download/site-content/CLASS.xls'
url <- 'https://databankfiles.worldbank.org/public/ddpext_download/site-content/CLASS.xlsx'

filename <- tempfile(fileext = '.xls')
filename <- tempfile(fileext = '.xlsx')
download.file(url, filename, quiet = TRUE)

not_countries <- c("Arab World", "Caribbean small states", "Central Europe and the Baltics", "Early-demographic dividend", "East Asia & Pacific", "East Asia & Pacific (excluding high income)", "East Asia & Pacific (IDA & IBRD)", "Euro area", "Europe & Central Asia", "Europe & Central Asia (excluding high income)", "Europe & Central Asia (IDA & IBRD)", "European Union", "Fragile and conflict affected situations", "Heavily indebted poor countries (HIPC)", "High income", "IBRD only", "IDA & IBRD total", "IDA blend", "IDA only", "IDA total", "Late-demographic dividend", "Latin America & Caribbean", "Latin America & Caribbean (excluding high income)", "Latin America & Caribbean (IDA & IBRD)", "Least developed countries: UN classification", "Low & middle income", "Low income", "Lower middle income", "Middle East & North Africa", "Middle East & North Africa (excluding high income)", "Middle East & North Africa (IDA & IBRD)", "Middle income", "North America", "OECD members", "Other small states", "Pacific island small states", "Post-demographic dividend", "Pre-demographic dividend", "Small states", "South Asia", "South Asia (IDA & IBRD)", "Sub-Saharan Africa", "Sub-Saharan Africa (excluding high income)", "Sub-Saharan Africa (IDA & IBRD)", "Upper middle income", "World")

# weird read_excel call to silence warnings
wb <- read_excel(filename, skip = 6, col_names = letters[1:9]) %>%
select(3:4) %>%
wb <- read_excel(filename, sheet = "List of economies") %>%
select(Economy, Code) %>%
setNames(c('country', 'wb')) %>%
filter(!country %in% not_countries,
!is.na(wb))
filter(
!country %in% not_countries,
!is.na(wb)
)

wb %>% write_csv('dictionary/data_world_bank.csv', na = "")

0 comments on commit dba113f

Please sign in to comment.