-
Notifications
You must be signed in to change notification settings - Fork 20
/
womensmarch.participation.plots.R
131 lines (114 loc) · 4.89 KB
/
womensmarch.participation.plots.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
data_source <- "https://docs.google.com/spreadsheets/d/1xa0iLqYKz8x9Yc_rfhtmSOJQ2EGgeUVjvV4A8LsIaxY/htmlview?sle=true#gid=0"
library(dplyr)
library(tidyr)
library(stringr)
library(rvest)
library(Hmisc)
library(WDI)
library(scales)
# load and clean event-level data on women's marches
crowds <-
read_html(data_source) %>%
html_table(header = FALSE) %>%
.[[1]] %>%
# get rid of junk rows at the top
slice(12:n()) %>%
transmute(location = X2,
state = X4,
country = X5,
est.low = as.numeric(gsub(",", "", X6)),
est.high = as.numeric(gsub(",", "", X7)),
source = X10) %>%
# clear out some remaining rows with no info or aggregates
filter(!is.na(country),
!grepl("Disablity", location),
!grepl("Int'l", location))
### US STATES ###
# state-level march data
state_marchers <- crowds %>%
filter(country == "US") %>%
group_by(state) %>%
summarise(total.high = sum(est.high, na.rm = TRUE),
total.low = sum(est.low, na.rm = TRUE))
# state-level population data
state_pops <- read.csv("http://www2.census.gov/programs-surveys/popest/datasets/2010-2016/national/totals/nst-est2016-alldata.csv",
stringsAsFactors = FALSE) %>%
filter(STATE > 0) %>%
transmute(statename = NAME,
pop = POPESTIMATE2016) %>%
mutate(state = state.abb[match(statename, state.name)])
state_pops$state[state_pops$statename == "District of Columbia"] <- "DC"
# join those two and compute shares
state <- left_join(state_marchers, state_pops) %>%
filter(!is.na(pop)) %>%
mutate(marchers.percent.high = (total.high/pop) * 100,
marchers.percent.low = (total.low/pop) * 100) %>%
arrange(desc(marchers.percent.high))
# create a version that drops DC, which is an outlier in a couple of ways
state_nodc <- filter(state, state != "DC")
png("womens.marchers.state.percent.png",
res = 150, width = 5, height = 7, unit = "in")
par(mai=c(1,0.25,0.25,0.25))
dotchart2(state_nodc$marchers.percent.high,
labels = state_nodc$statename, cex.labels = 3/4,
lines = TRUE, lwd = 1/20, lty = 3,
dotsize = 1, col = "deeppink2", pch = 20,
xlab = "2017 Women's March event participants \nas a share of pop. of events' host state")
dev.off()
# get and prep election results data
state_vote <- read.table("https://raw.githubusercontent.com/kshaffer/election2016/master/2016ElectionResultsByState.csv",
sep = ",", header = TRUE, stringsAsFactors = FALSE) %>%
mutate(trumpshare = trumpVotes/totalVotes * 100) %>%
select(state = postal, trumpshare)
# merge vote data with no-DC version of state data
state_nodc <- left_join(state_nodc, state_vote)
# make scatter plot of participation vs. trump vote share
png("womens.marchers.relative.to.trump.vote.png",
res = 150, width = 5, height = 5, unit = "in")
par(mai=c(1, 1, 0.1, 0.1))
plot(x = state_nodc$trumpshare, xlim = c(25,75),
y = state_nodc$marchers.percent.high, ylim = c(0,4),
type = "n", axes = FALSE, xlab = "Trump's host-state vote share", ylab = "total marchers as % of host-state pop.")
text(x = state_nodc$trumpshare, xlim = c(25,75),
y = state_nodc$marchers.percent.high,
labels = state_nodc$state,
cex = 1, col = scales::alpha("black", 2/3))
axis(1, at = seq(25, 75, 25))
axis(2, at = seq(0,4,1), las = 2)
dev.off()
### GLOBAL ###
# get country-level sums of crowd sizes
crowds_intl <- crowds %>%
group_by(country) %>%
summarise(total.low = sum(est.low, na.rm = TRUE),
total.high = sum(est.high, na.rm = TRUE)) %>%
arrange(desc(total.high)) %>%
filter(country != "") %>%
mutate(iso3c = countrycode::countrycode(country, "country.name", "iso3c"))
crowds_intl$country[crowds_intl$country == "Czech"] <- "Czechia"
crowds_intl$iso3c[crowds_intl$country == "Czechia"] <- "CZE"
crowds_intl$iso3c[crowds_intl$country == "Kosovo"] <- "KOS"
# use WDI package get population data and prep it for merging
pop_intl <- WDI(country="all",
indicator = "SP.POP.TOTL",
extra = TRUE,
start = 2015, end = 2015) %>%
filter(!is.na(iso3c)) %>%
select(iso3c, pop = SP.POP.TOTL) %>%
# add a row for Kosovo, which WB doesn't consider to be a state, I guess
rbind(data.frame(iso3c = "KOS", pop = 1824000)) %>%
mutate(iso3c = as.character(iso3c))
# merge and arrange country data
intl <- left_join(crowds_intl, pop_intl) %>%
mutate(participation.rate = (total.high/pop) * 100) %>%
arrange(desc(participation.rate))
# make dot plot of country participation rates
png("womens.march.global.participation.rate.png",
res = 150, width = 5, height = 7.5, unit = "in")
par(mai=c(1,0.2,0.2,0.2))
dotchart2(intl$participation.rate,
labels = intl$country, cex.labels = 7/10,
lines = TRUE, lwd = 1/20, lty = 3,
dotsize = 1, col = "deeppink2", pch = 20,
xlab = 'total marchers (high estimates) \nas a percentage of national population')
dev.off()