-
Notifications
You must be signed in to change notification settings - Fork 20
/
china.monthly.strike.counts.R
169 lines (148 loc) · 9.6 KB
/
china.monthly.strike.counts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# This script uses data downloaded from the China Labour Bulletin's strike map (http://maps.clb.org.hk/strikes/en)
# on May 28, 2015. The date range was set to January 2011-May 2015; all other fields were set to "Any".
# It will need to be edited if the number of months covered changes. The summarizing parts will work fine;
# it's just that some aspects of the plotting code (e.g., position of axes) are based on time series with 53 observations.
# You can download the data file I used here from my Google Drive, here:
# https://drive.google.com/file/d/0B5wyt4eDq98GUWNwTUg0ZGpWVHM/view?usp=sharing
library(XLConnect)
library(plyr)
library(scales)
CLB <- readWorksheetFromFile("china labor bulletin incident data.xlsx", sheet=1)
CLB$year <- as.numeric(substr(as.character(CLB$Date), 1, 4))
CLB$month <- as.numeric(substr(as.character(CLB$Date), 6, 7))
CLB$event <- 1
# PLOTTING BY PROVINCE
CLB.mo.pro <- ddply(CLB, .(year, month, Location), summarise, total = sum(event))
# The resulting df doesn't have rows for cases with no events, but we want to include those
# and put zeroes there instead. So we make a grid with all province-month combinations, merge
# the ddply results with it, and replace the NAs with 0s.
CLB.mo.pro.grid <- expand.grid(Location = unique(CLB$Location),
year = seq(min(CLB$year), max(CLB$year)),
month = seq(1, 12))
# Cut rows for months that haven't happened and unnamed locations
CLB.mo.pro.grid <- subset(CLB.mo.pro.grid, (year < 2015 | (year==2015 & month <= 5)) & is.na(Location)==FALSE)
# Merge
CLB.mo.pro.2 <- merge(CLB.mo.pro.grid, CLB.mo.pro, all.x=TRUE)
# Replace NAs with 0s
CLB.mo.pro.2[is.na(CLB.mo.pro.2)] <- 0
# Function to plot by province
plotit <- function(name) {
z <- subset(CLB.mo.pro.2, Location == name)
with(z, plot(total, type = "n", xlab = "", ylab = "", ylim=c(0,40), axes=FALSE))
mtext(name, side=2, line=1, las=2, cex=0.8)
abline(h=c(0,20,40), col=alpha("gray50", 0.5), lwd=0.5)
with(z, lines(total, col="gray25", lwd = 2))
axis(4, at=c(0,20,40), tick=FALSE, pos=50, las=2)
}
png("sparklines.province.png", width=6, height=10, unit="in", bg="white", res=300)
par(mai=c(0.1, 1.25, 0.1, 0.2), cex.axis=0.5, mfrow=c(ceiling(length(unique(CLB.mo.ind.2$Location))/2),2))
for (i in 1:length(unique(CLB.mo.pro.2$Location))) plotit(as.character(unique(CLB.mo.pro.2$Location)[i]))
dev.off()
# BY INDUSTRY
CLB.mo.ind <- ddply(CLB, .(year, month, Industry), summarise, total = sum(event))
# The resulting df doesn't have rows for cases with no events, but we want to include those
# and put zeroes there instead. So we make a grid with all province-month combinations, merge
# the ddply results with it, and replace the NAs with 0s.
CLB.mo.ind.grid <- expand.grid(Industry = unique(CLB$Industry),
year = seq(min(CLB$year), max(CLB$year)),
month = seq(1, 12))
# Cut rows for months that haven't happened and unnamed industries
CLB.mo.ind.grid <- subset(CLB.mo.ind.grid, (year < 2015 | (year==2015 & month <= 5)) & is.na(Industry)==FALSE)
# Merge
CLB.mo.ind.2 <- merge(CLB.mo.ind.grid, CLB.mo.ind, all.x=TRUE)
# Replace NAs with 0s
CLB.mo.ind.2[is.na(CLB.mo.ind.2)] <- 0
# Function to plot by industry. The y-axis range is set to accommodate the maximum
# value for any one industry and is standardized to facilitate comparison across industries.
plotit <- function(name) {
z <- subset(CLB.mo.ind.2, Industry == name)
with(z, plot(total, type = "n", xlab = "", ylab = "", ylim=round(range(CLB.mo.ind.2$total), -1), axes=FALSE))
mtext(name, side=2, line=1, las=2, cex=0.8)
abline(h=c(0,60,120), col=alpha("gray50", 0.5), lwd=0.5)
with(z, lines(total, col="gray25", lwd = 2))
axis(4, at=c(0,60,120), tick=FALSE, pos=50, las=2)
}
# I'm setting the height of the .png to be proportionate to the one for provinces so the sparklines
# are the same size across the two figures.
png("sparklines.industry.png", width=6, height=5/16 * 10, unit="in", bg="white", res=300)
par(mai=c(0.1, 1.25, 0.1, 0.2), cex.axis=0.5, mfrow=c(ceiling(length(unique(CLB.mo.ind.2$Industry))/2),2))
for (i in 1:length(unique(CLB.mo.ind.2$Industry))) plotit(as.character(unique(CLB.mo.ind.2$Industry)[i]))
dev.off()
# BY DEMAND
# Run through the demand field iteratively, looking for specific strings. This starts with strsplit to break
# the Demands field into strings separated by commas. Next, it uses grep to look within the vectors that creates
# for each row for the specified string (e.g., "wage arrear"). isTRUE converts the result to a logical that ifelse
# can understand. If the specified word or phrase is there, it gets a 1; otherwise 0. I thought about doing this in
# a single claim.type variable, but that gets screwy with cases that fit multiple categories. Then this would keep
# replacing the value sequentially, ultimately only showing the last one checked for in this sequence.
# Start by making a lowercase version of the Demands var to avoid errors of omission from capitalization.
CLB$demands <- tolower(CLB$Demands)
# Get list with vectors of comma-separated demand strings for each record
x <- strsplit(CLB$demands, ",")
for (i in 1:dim(CLB)[1]) CLB$wage.arrears[i] <- ifelse(isTRUE(grep("wage arrear", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$social.security[i] <- ifelse(isTRUE(grep("social security", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$work.conditions[i] <- ifelse(isTRUE(grep("work conditions", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$pay[i] <- ifelse(isTRUE(grep("pay", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$violence[i] <- ifelse(isTRUE(grep("violence", x[[i]]) > 0) |
isTRUE(grep("attack", x[[i]]) > 0) | isTRUE(grep("thug", x[[i]]) > 0) |
isTRUE(grep("beat", x[[i]]) > 0) | isTRUE(grep("kill", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$compensation[i] <- ifelse(isTRUE(grep("compensation", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$pension[i] <- ifelse(isTRUE(grep("pension", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$taxi[i] <- ifelse(isTRUE(grep("taxi", x[[i]]) > 0) |
isTRUE(grep("cabs", x[[i]]) > 0) | isTRUE(grep("uber", x[[i]]) > 0) |
isTRUE(grep("car", x[[i]]) > 0) | isTRUE(grep("rickshaw", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$relocation[i] <- ifelse(isTRUE(grep("relocation", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$corruption[i] <- ifelse(isTRUE(grep("corruption", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$prices[i] <- ifelse(isTRUE(grep("prices", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$overtime[i] <- ifelse(isTRUE(grep("ot", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$layoffs[i] <- ifelse(isTRUE(grep("layoff", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$bonus[i] <- ifelse(isTRUE(grep("bonus", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$merger[i] <- ifelse(isTRUE(grep("merger", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$housing[i] <- ifelse(isTRUE(grep("housing", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$regulation[i] <- ifelse(isTRUE(grep("regulation", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$leave[i] <- ifelse(isTRUE(grep("leave", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$contract[i] <- ifelse(isTRUE(grep("contract", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$housing[i] <- ifelse(isTRUE(grep("housing", x[[i]]) > 0), 1, 0)
for (i in 1:dim(CLB)[1]) CLB$management[i] <- ifelse(isTRUE(grep("management", x[[i]]) > 0), 1, 0)
# Then use ddply to get monthly sums by various aggregations of these dummies...
CLB.monthly <- ddply(CLB, .(year, month), summarise,
total = sum(event),
pay = sum(wage.arrears, pay, compensation, bonus, overtime),
conditions = sum(work.conditions, housing, leave),
layoffs = sum(layoffs),
taxi = sum(taxi),
welfare = sum(social.security, pension))
# Then plot results
png("china.monthly.strike.count.png",
width = 6, height = 2.5, unit = "in", bg = "white", res = 300)
par(cex.axis = 0.7, mai=c(0.4, 0.5, 0.1, 0.1))
with(CLB.monthly, plot(total, type = "n", xlab = "", ylab = "", axes = FALSE))
axis(2, tick = FALSE, las = 2)
axis(1, at=seq(1, dim(CLB.monthly)[1], 12), labels=seq(2011,2015), tick=FALSE, pos=15)
abline(h = seq(0, 300, 50), lwd = 0.5, col = alpha("gray50", alpha = 1/2))
with(CLB.monthly, lines(total, col = "gray25", lwd = 2))
dev.off()
# Did this one manually to customize labels.
png("sparklines.demands.png", width=6, height=2/16 * 10, unit="in", bg="white", res=300)
par(mai=c(0.1, 1.25, 0.1, 0.2), cex.axis=0.5, mfrow=c(2,2))
with(CLB.monthly, plot(pay, type = "n", xlab = "", ylab = "", ylim=c(0,260), axes=FALSE))
mtext("Pay", side=2, line=1, las=2, cex=0.8)
abline(h=seq(0,200,100), col=alpha("gray50", 0.5), lwd=0.5)
with(CLB.monthly, lines(pay, col="gray25", lwd = 2))
axis(4, at=seq(0,200,100), tick=FALSE, pos=50, las=2)
with(CLB.monthly, plot(welfare, type = "n", xlab = "", ylab = "", ylim=c(0,260), axes=FALSE))
mtext("Social security", side=2, line=1, las=2, cex=0.8)
abline(h=seq(0,200,100), col=alpha("gray50", 0.5), lwd=0.5)
with(CLB.monthly, lines(welfare, col="gray25", lwd = 2))
axis(4, at=seq(0,200,100), tick=FALSE, pos=50, las=2)
with(CLB.monthly, plot(conditions, type = "n", xlab = "", ylab = "", ylim=c(0,260), axes=FALSE))
mtext("Work conditions", side=2, line=1, las=2, cex=0.8)
abline(h=seq(0,200,100), col=alpha("gray50", 0.5), lwd=0.5)
with(CLB.monthly, lines(conditions, col="gray25", lwd = 2))
axis(4, at=seq(0,200,100), tick=FALSE, pos=50, las=2)
with(CLB.monthly, plot(layoffs, type = "n", xlab = "", ylab = "", ylim=c(0,260), axes=FALSE))
mtext("Layoffs", side=2, line=1, las=2, cex=0.8)
abline(h=seq(0,200,100), col=alpha("gray50", 0.5), lwd=0.5)
with(CLB.monthly, lines(layoffs, col="gray25", lwd = 2))
axis(4, at=seq(0,200,100), tick=FALSE, pos=50, las=2)
dev.off()