-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathREADME.Rmd
146 lines (119 loc) · 6.66 KB
/
README.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
---
title: "TLA+ Surveys"
date: "(`r Sys.Date()`)"
output:
github_document:
toc: true
toc_depth: 2
html_preview: false
---
[2022 survey](https://docs.google.com/forms/d/1GQnqZgfJKTZgZd-TydtdqT8axvRrTdWH7M87VUt0_Wk/edit?usp=sharing)
```{r, echo=FALSE, warning=FALSE, message=FALSE}
## Import from csv file exported from Google Forms.
data <- read.csv(header=TRUE, sep = ",", file = "2022.csv")
## Rename column names 10 and 19 to 26 (https://www.tutorialkart.com/r-tutorial/r-dataframe-change-column-name/).
colnames(data)[2:4] <- c("Frequency", "Expertise", "Learned")
colnames(data)[8] <- c("Welcome")
colnames(data)[10:11] <- c("Country", "Underrepresented")
IA <- c("Spec2Code", "Doc", "Library", "Performance", "Lang", "CI/CD", "Extensibility", "IDE")
colnames(data)[19:26] <- IA
#data <- data %>% filter(Welcome == "No")
#data <- data %>% filter(Underrepresented == "Yes")
########### Geography
## Convert empty/NA into "Unknown" to prevent countrycode from converting it into Namibia.
data[c(10)] <- lapply(data[c(10)], gsub, pattern = "^$", replacement = "Unknown", ignore.case = TRUE)
## Map human-provided countries to iso country names.
library(countrycode)
data$iso2c <- countrycode(data$Country, origin = "country.name", destination = "iso2c")
data$countryname <- countrycode(data$Country, origin = "country.name", destination = "country.name")
library(dplyr)
library(ggplot2)
ggplot(data %>% count(countryname), aes(x = reorder(countryname, -n), y = n, fill = countryname)) +
geom_bar(stat = "identity") +
xlab("Country") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
## Map human-provided countries to iso country names.
library(countrycode)
data$iso2c <- countrycode(data$Country, origin = "country.name", destination = "iso2c")
## (https://stackoverflow.com/a/64769847/6291195)
library(maps)
world_map <- map_data("world")
world_map <- subset(world_map, region != "Antarctica")
## The following line does the trick that iso2c from above works with the map.
world_map$region <- iso.alpha(world_map$region)
C <- data %>% filter(!is.na(iso2c)) %>% count(iso2c)
ggplot(data %>% filter(!is.na(iso2c)) %>% count(iso2c)) +
geom_map(
dat = world_map, map = world_map, aes(map_id = region),
fill = "white", color = "#7f7f7f", size = 0.25 ) +
geom_map(map = world_map, aes(map_id = iso2c, fill = n), size = 0.25) +
scale_fill_gradient(low = "#fff7bc", high = "#cc4c02", name = "Respondants") +
expand_limits(x = world_map$long, y = world_map$lat)
########### Improvement Areas
## Remove bogus strings from column 19 to 26 (https://stackoverflow.com/a/56963488/6291195).
nm1 <- c(19:26)
data[nm1] <- lapply(data[nm1], gsub, pattern = " - Most Important", replacement = "")
data[nm1] <- lapply(data[nm1], gsub, pattern = " - Least Important", replacement = "")
data[nm1] <- lapply(data[nm1], gsub, pattern = "N/A / I do not use this", replacement = "")
## Convert column 19 to 26 to numeric (https://stackoverflow.com/a/2290107/6291195).
data[, nm1] <- sapply(data[, nm1], as.numeric)
## Replace all NA with value 6 (to end with 0 after next lapply).
data[nm1] <- lapply(data[nm1], function(x) replace(x, is.na(x), 6))
data[nm1] <- lapply(data[nm1], function(x) 6 - x)
library(tidyr)
library(ggplot2)
{ggplot(pivot_longer(data %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("All Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")}
{ggplot(pivot_longer(data %>% filter(Frequency == "Rarely") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Rarely Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")}
ggplot(pivot_longer(data %>% filter(Frequency == "Monthly") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Monthly Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
ggplot(pivot_longer(data %>% filter(Frequency == "Weekly") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Weekly Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
ggplot(pivot_longer(data %>% filter(Frequency == "Daily") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Daily Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
########### Experience
## Convert semantically ordered categorial/characters into numericals for correlation analysis.
data$FrequencyNum <- as.numeric(factor(data$Frequency, levels = c("Rarely", "Monthly", "Weekly", "Daily")))
data$ExpertiseNum <- as.numeric(factor(data$Expertise, levels = c("I can neither read nor write TLA+", "I can write useful specifications for work, but it's a struggle", "I can write simple specifications", "I am productive writing TLA+", "I am an expert")))
data$LearnedNum <- as.numeric(factor(data$Learned, levels = c("Recently", "Within last 12 months", "Within last 5 years", "Longer than 5 years ago")))
nm2 <- c("FrequencyNum","ExpertiseNum", "LearnedNum")
data[nm2] <- lapply(data[nm1], function(x) replace(x, is.na(x), 0))
library(ggcorrplot)
my_data <- data[, nm2]
p.mat <- cor_pmat(my_data)
corr <- round(cor(my_data), 3)
ggcorrplot(corr, p.mat = cor_pmat(my_data),
hc.order = TRUE, type = "lower",
color = c("#FC4E07", "white", "#00AFBB"),
outline.col = "white", lab = TRUE)
```
#### README.md is generated from README.Rmd on a host with all libraries installed via:
```shell
Rscript -e "rmarkdown::render('README.Rmd')"
```
### Install required libraries and R packages (on macOS) with:
```shell
brew install pandoc r
Rscript -e "install.packages(c('rmarkdown', 'ggplot2','dplyr', 'here'), repos='http://cran.us.r-project.org')"
```