-
Notifications
You must be signed in to change notification settings - Fork 31
geom_bump with flags
David Sjoberg edited this page Mar 13, 2020
·
1 revision
Make the dataset:
if(!require(ggbump)) devtools::install_github("davidsjoberg/ggbump")
if(!require(ggflags)) devtools::install_github("rensa/ggflags")
if(!require(pacman)) install.packages("pacman")
pacman::p_load(tidyverse, countrycode, janitor, padr, hablar, ggflags, ggbump, lubridate)
# Source: https://www.kaggle.com/heesoo37/120-years-of-olympic-history-athletes-and-results/version/2
df_raw <- read.csv("athlete_events.csv") %>%
clean_names() %>%
retype()
# Only include summer olympics and years after 1990
df <- df_raw %>%
filter(season == "Summer",
year >= 1990)
# Summarise data set be unique on year, country and number of medals
df <- df %>%
group_by(year, team) %>%
summarise(n_medals = sum_(!is.na(medal))) %>%
ungroup()
# replace Unified Team with Russia (because I want to use a flag)
df <- df %>%
mutate(team = case_when(team == "Unified Team" ~ "Russia",
T ~ team))
# Create rank per olympic games
df <- df %>%
group_by(year) %>%
mutate(rank = rank(-n_medals, ties.method = "random")) %>%
ungroup()
# Create average rank per country
df <- df %>%
group_by(team) %>%
mutate(mean_rank = mean_(rank)) %>%
ungroup()
# Only keep top 10 mean medal countries
df <- df %>%
filter(team %in% (df %>%
distinct(team, mean_rank) %>%
top_n(10, -mean_rank) %>%
pull(team)))
# Rerank the countries
# Create rank per olympic games
df <- df %>%
group_by(year) %>%
mutate(rank = rank(-n_medals, ties.method = "random")) %>%
ungroup()
# Remove unnecessary columns
df <- df %>%
select(-mean_rank)
geom_flag
needs the two letter short for each country:
country_2_letters <- countrycode(df$team %>% unique() %>% sort(),
origin = "country.name",
destination = "genc2c") %>%
tolower() %>%
set_names(df$team %>% unique() %>% sort())
df <- df %>%
mutate(team_2_letters = country_2_letters[team])
Add information on host countries
host_countries <- tibble(
team = c("United States",
"Australia",
"Greece",
"China",
"Great Britain",
"Brazil"),
year = c(1996,
2000,
2004,
2008,
2012,
2016
)
) %>%
mutate(host = 1)
df <- df %>%
left_join(host_countries, by = c("team", "year"))
# Has the country been host in this time period
df <- df %>%
group_by(team) %>%
mutate(has_been_host = dummy_(any(host == 1, na.rm = T))) %>%
ungroup()
df %>%
ggplot(aes(year, rank, group = team, color = team, fill = team)) +
geom_bump(aes(smooth = 10), size = 1.5, lineend = "round") +
geom_label(data = df %>% filter(host == 1),
aes(label = "Host")) +
geom_text(data = df %>% filter(host == 1),
aes(label = "Host"),
color = "black") +
geom_flag(data = df %>% filter(year == min(year)),
aes(country = team_2_letters),
size = 8,
color = "black") +
geom_flag(data = df %>% filter(year == max(year)),
aes(country = team_2_letters),
size = 8,
color = "black") +
scale_color_manual(values = c(wesanderson::wes_palette("GrandBudapest2"), wesanderson::wes_palette("GrandBudapest1"), wesanderson::wes_palette("BottleRocket2"))) +
scale_fill_manual(values = c(wesanderson::wes_palette("GrandBudapest2"), wesanderson::wes_palette("GrandBudapest1"), wesanderson::wes_palette("BottleRocket2"))) +
scale_y_reverse(breaks = 1:100) +
scale_x_continuous(breaks = df$year %>% unique()) +
theme_minimal() +
theme(legend.position = "none",
panel.grid = element_blank(),
panel.background = element_rect(fill = "gray60", color = "transparent"),
plot.background = element_rect(fill = "gray60"),
text = element_text(color = "white")) +
labs(x = NULL,
y = NULL,
title = "Hosting is boosting the number of medals",
subtitle = "Number of medals on Summer Olympics 1992 - 2016")