directory_of_visualizations.Rmd

```{r echo = FALSE, message = FALSE}
# run setup script
source("_common.R")

library(dplyr)
library(tidyr)
library(ggforce)
library(ggridges)
library(treemapify)
library(forcats)
library(statebins)
library(sf)
library(ungeviz)
```

# Directory of visualizations {#directory-of-visualizations}

This chapter provides a quick visual overview of the various plots and charts that are commonly used to visualize data. It is meant both to serve as a table of contents, in case you are looking for a particular visualization whose name you may not know, and as a source of inspiration, if you need to find alternatives to the figures you routinely make.

```{r}
## general setup code

# line_size = 0.6

# theme
theme_plot_icon <- function(bg_color = "#F5F8EA", line_color = "#243400",
                            line_size = .5, font_size = 14) {
  theme_dviz_open() %+replace% theme(
    axis.text.x       = element_blank(),
    axis.text.y       = element_blank(),
    axis.title.x      = element_blank(),
    axis.title.y      = element_blank(),
    #axis.line.x       = element_blank(),
    #axis.line.y       = element_blank(),
    #axis.ticks        = element_blank(),
    axis.line.x       = element_line(size = line_size, color = line_color),
    axis.line.y       = element_line(size = line_size, color = line_color),
    axis.ticks        = element_line(size = line_size, color = line_color),
    axis.ticks.length = grid::unit(4, "pt"),
    legend.position   = "none",
    plot.margin       = margin(
      font_size*8/14, font_size, font_size*10/14, font_size
    ),
    plot.title        = element_text(
      hjust = 0.5,
      #family = dviz_font_family_bold,
      family = dviz_font_family_condensed,
      color = line_color,
      size = font_size,
      margin = margin(0, 0, font_size*6/14, 0)
    ),
    plot.background   = element_rect(fill = bg_color)
  )
}

theme_plot_icon_hgrid <- function(bg_color = "#F5F8EA", line_color = "#243400",
                                  line_size = .5, font_size = 14) {
  theme_plot_icon(bg_color, line_color, line_size, font_size) %+replace% theme(
      # make grid lines
      #panel.grid.major.y   = element_line(colour = paste0(line_color, "30"),
      #                                    size = 0.5),

      # remove x axis
      axis.ticks.x        = element_blank(),
      axis.line.x         = element_blank()
  )
}

theme_plot_icon_vgrid <- function(bg_color = "#F5F8EA", line_color = "#243400",
                                  line_size = .5, font_size = 14) {
  theme_plot_icon(bg_color, line_color, line_size, font_size) %+replace% theme(
      # make grid lines
      #panel.grid.major.x   = element_line(colour = paste0(line_color, "30"),
      #                                    size = 0.5),

      # remove y axis
      axis.ticks.y        = element_blank(),
      axis.line.y         = element_blank()
  )
}

theme_plot_icon_blank <- function(bg_color = "#F5F8EA", line_color = "#243400",
                                  line_size = .5, font_size = 14) {
  theme_plot_icon(bg_color, line_color, line_size, font_size) %+replace% theme(
      axis.ticks          = element_blank(),
      axis.line.x         = element_blank(),
      axis.line.y         = element_blank(),
      axis.ticks.length    = grid::unit(0, "pt")
  )
}

# data sets
set.seed(5142)

n <- 15
x <- rnorm(n)
y <- .4*x + .6*rnorm(n)
df_scatter_xy <- data.frame(x, y)

df_one_dist <- data.frame(x = c(rnorm(1000, 1., 1.6), rnorm(300, 4, .4)))

df_one_normal <- data.frame(x = rnorm(20))

df_fractions <- data.frame(y = c(.3, .39, .48, .6, .25, .13, .22, .24, .45, .48, .3, .16),
                 x = factor(rep(1:4, 3)),
                 type = rep(c("A", "B", "C"), each = 4))


set.seed(2474)

n <- 8
x <- rnorm(n)
y <- .4*x + .6*rnorm(n)
z <- .5*x + .3*rnorm(n)
z <- (z - min(z) + 0.1)^2
df_scatter_xyz <- data.frame(x, y, z)


set.seed(5012)
df_multi_amounts <- mutate(df_fractions,
                           y = c(1.0, 1.1, 1.4, 1.2)[x]*y)

n <- 70
df_multi_dist <- data.frame(y = c(rnorm(n, 1, .8), rnorm(n, 2, .7), rnorm(n, 0, .5)),
                 type = rep(c("A", "B", "C"), each = n),
                 number = rep(c(2, 1, 3), each = n))


df_props = data.frame(value = c(55, 30, 15),
                      group = c("A", "B", "C"))

df_multi_props <- data.frame(
  var1 = rep(c("C", "B", "A"), 3),
  var2 = rep(c("A", "B", "C"), each = 3),
  count = c(4, 1, 2, 12, 9, 5, 4, 5, 4)
) %>% group_by(var2) %>%
  mutate(group_count = sum(count))

df_multi_props2 <- data.frame(
  var1 = rep(c("B", "A"), 9),
  var2 = rep(c("E", "E", "D", "D", "C", "C"), 3),
  var3 = rep(c("H", "G", "F"), each = 6),
  count = c(5, 8, 0, 0, 0, 0, 0, 3, 2, 7, 0, 0, 4, 0, 4, 2, 7, 4)
)

df_sets <- gather_set_data(df_multi_props2, 1:3)

df_one_line <- data.frame(
  x = 1:5,
  y = c(3.1, 3.3, 4.0, 3.8, 4.4)
)

set.seed(9681)
n1 <- 1500/5
n2 <- 800/5
x1 <- rnorm(n1, 0, .7)
y1 <- 2 * x1 + rnorm(n1, 0, .8)

x2 <- rnorm(n2, 0, 0.4)
y2 <- 1.5 * x2 + rnorm(n2, .5, .8)

df_dense_scatter <- na.omit(
  data.frame(
    x = scales::censor(c(x1, x2 + 2.2), c(-2, 4)),
    y = scales::censor(c(y1, y2 + 1.5), c(-3.5, 4.5))
  )
)

y1 <- 2 * x1 + rnorm(n1, 0, 1.6)
y2 <- 1.5 * x2 + rnorm(n2, .5, 1.6)
df_dense_scatter_sample <- na.omit(
  data.frame(
    x = scales::censor(c(x1, x2 + 2.2), c(-2, 4)),
    y = scales::censor(c(y1, y2 + 1.5), c(-3.5, 4.5))
  )
) %>% sample_n(50)

df_connected_scatter <- data.frame(
  x = c(1.9, 1.5, 2.2, 3, 3.3, 2.7, 1.7, 1),
  y = c(0.3, -1, -2.0, -0.9, .6, 1.8, 2, 0.7),
  t = 1:8
)

df_paired <- data.frame(
  y = c(6, 5.3, 3.8, 2.8, 2,
        4.3, 6.1, 5.1, 3.3, 2.4),
  x = rep(c("A", "B"), each = 5),
  group = rep(1:5, 2)
)

df_uncertain <- data.frame(
  type = c("A", "B", "C"),
  x = c(1.5, 2.2, 3.4),
  y = c(3.2, 5.1, 3.9),
  dx = c(.25, .3, .35),
  dy = c(.5, .4, .6)
)


# palettes

npal <- 5
# earth-brown (Amounts)
pal_earth_brown <- sequential_hcl(n = npal, h1 = 71, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)

# brown-green (Proportions)
pal_brown_green <- sequential_hcl(n = npal, h1 = 86, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)

# green-brown (Geospatial data)
pal_green_brown <- sequential_hcl(n = npal, h1 = -265, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)

# burgundy-red 
pal_red_brown <- sequential_hcl(n = npal, h1 = 28, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)

# brown-red (Uncertainty)
pal_brown_red <- sequential_hcl(n = npal, h1 = 41, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)

# ocean-blue (Distributions)
pal_ocean_blue <- sequential_hcl(n = npal, h1 = 241, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)

# steel-blue (x-y relationships)
pal_steel_blue <- sequential_hcl(n = npal, h1 = 257, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)
pal_steel_blue_inv <- sequential_hcl(n = npal, h1 = 257-180, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5)
```

## Amounts

```{r amounts, fig.width = 5*6/4.2, fig.asp = 1/4}
palette <- pal_earth_brown

p1 <- ggplot(df_props, aes(x = group, y = value)) + 
  geom_col(
    position="identity", color = palette[npal],
    fill = palette[3], width = 0.8
  ) +
  scale_y_continuous(limits = c(0, 66), expand = c(0, 0)) +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Bars") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p2 <- ggplot(df_props, aes(x = fct_rev(group), y = value)) + 
  geom_col(position="identity", color = palette[npal], fill = palette[3],
           width = .8) +
  scale_y_continuous(limits = c(0, 66), expand = c(0, 0)) +
  scale_fill_manual(values = palette[2:4]) +
  coord_flip() +
  labs(title = "Bars") +
  theme_plot_icon_vgrid(palette[npal], palette[1])

p3 <- ggplot(filter(df_multi_amounts, x!=4), aes(x, y,
                                   fill=factor(type, levels = c("A", "C", "B")))) + 
  geom_col(position="dodge", color = palette[npal],
           width = .7) +
  scale_y_continuous(expand = c(0, 0),
                     limits = c(0, .7)) +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Grouped Bars") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p4 <- ggplot(filter(df_multi_amounts, x!=4), aes(x, y,
                                   fill=factor(type, levels = c("B", "C", "A")))) + 
  geom_col(position="dodge", color = palette[npal],
           width = .7) +
  scale_y_continuous(expand = c(0, 0),
                     limits = c(0, .7)) +
  scale_fill_manual(values = rev(palette[2:4])) +
  coord_flip() +
  labs(title = "Grouped Bars") +
  theme_plot_icon_vgrid(palette[npal], palette[1])

p5 <- ggplot(df_multi_amounts, aes(x, y, fill=factor(type, levels = c("B", "C", "A")))) + 
  geom_col(position="stack", color = palette[npal]) +
  scale_y_continuous(limits = c(0, 1.55),
                     expand = c(0, 0)) +
  scale_fill_manual(values = rev(palette[2:4])) +
  labs(title = "Stacked Bars") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p6 <- p5 + coord_flip() + theme_plot_icon_vgrid(palette[npal], palette[1])

p7 <- ggplot(df_props, aes(x = fct_rev(group), y = value)) + 
  geom_point(color = palette[2], size = 2) +
  scale_y_continuous(limits = c(0, 66), expand = c(0, 0)) +
  coord_flip() + 
  labs(title = "Dots") +
  theme_plot_icon_vgrid(palette[npal], palette[1])

p8 <- ggplot(filter(df_multi_amounts, x != 1), aes(x, y = factor(type, levels = c("A", "C", "B")), fill = y)) + 
  geom_tile(color = palette[5], size = 1.5) +
  scale_fill_continuous_sequential(
    h1 = 71, c1 = 80, c2 = 10, l1 = 18, l2 = 97, p1 = 1.5,
    begin = 0.2, end = 0.75,
    rev = FALSE
  ) +
  labs(title = "Heatmap") +
  theme_plot_icon_blank(palette[npal], palette[1])

plot_grid(p1, p2, p7, ncol = 4, scale = .9)
```

The most common approach to visualizing amounts (i.e., numerical values shown for some set of categories) is using bars, either vertically or horizontally arranged (Chapter \@ref(visualizing-amounts)). However, instead of using bars, we can also place dots at the location where the corresponding bar would end (Chapter \@ref(visualizing-amounts)).

```{r amounts_multi, fig.width = 5*6/4.2, fig.asp = 1/2}
plot_grid(p3, p4, p5, p6, 
          p8, ncol = 4, scale = .9)
```

If there are two or more sets of categories for which we want to show amounts, we can group or stack the bars (Chapter \@ref(visualizing-amounts)). We can also map the categories onto the *x* and *y* axis and show amounts by color, via a heatmap (Chapter \@ref(visualizing-amounts)). 


## Distributions

```{r single-distributions, fig.width = 5*6/4.2, fig.asp = 1/4}

palette <- pal_ocean_blue

p1 <- ggplot(df_one_dist, aes(x)) +
  geom_histogram(fill = palette[3], color = palette[npal], binwidth = 1, center = 0) +
  scale_x_continuous(limits = c(-4.8, 6.8), expand = c(0, 0)) +
  scale_y_continuous(limits = c(0, 350), 
                     expand = c(0, 0)) +
  labs(title = "Histogram") +
  theme_plot_icon(palette[npal], palette[1])


p2 <- ggplot(df_one_dist, aes(x)) +
  geom_density(fill = palette[3], color = palette[npal], bw = .35) +
  scale_x_continuous(limits = c(-4.8, 6.8), expand = c(0, 0)) +
  scale_y_continuous(limits = c(0, .27), expand = c(0, 0)) +
  labs(title = "Density Plot") +
  theme_plot_icon(palette[npal], palette[1])

p3 <- ggplot(df_one_normal, aes(x)) +
  stat_ecdf(color = palette[2], size = .7) +
  scale_x_continuous(expand = c(0.05, 0)) +
  scale_y_continuous(limits = c(0, 1.08), expand = c(0, 0)) +
  labs(title = "Cumulative Density") +
  theme_plot_icon(palette[npal], palette[1])

p4 <- ggplot(df_one_normal, aes(sample = x)) +
  geom_abline(intercept = 0, slope = 1, color = palette[3]) +
  geom_qq(color = palette[1], size = 0.8) +
  labs(title = "Quantile-Quantile Plot") +
  theme_plot_icon(palette[npal], palette[1])

plot_grid(p1, p2, p3, p4, ncol = 4, scale = .9)
```

Histograms and density plots (Chapter \@ref(histograms-density-plots)) provide the most intuitive visualizations of a distribution, but both require arbitrary parameter choices and can be misleading. Cumulative densities and quantile-quantile (q-q) plots (Chapter \@ref(ecdf-qq)) always represent the data faithfully but can be more difficult to interpret.


```{r multiple-distributions, fig.width = 5*6/4.2, fig.asp = 1/2}
palette <- pal_ocean_blue

p1 <- ggplot(df_multi_dist, aes(x = type, y = y)) + 
  geom_boxplot(color = palette[1], fill = palette[4]) +
  labs(title = "Boxplots") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p2 <- ggplot(df_multi_dist, aes(x = type, y = y)) + 
  geom_violin(color = palette[npal], fill = palette[2], size = 0) +
  labs(title = "Violins") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

df_multi_dist_small <- group_by(df_multi_dist, type) %>%
  sample_n(50)

p3 <- ggplot(df_multi_dist_small, aes(x = type, y = y)) + 
  geom_jitter(color = palette[1], width = 0.15, height = 0, size = .3) +
  labs(title = "Strip Charts") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p4 <- ggplot(df_multi_dist_small, aes(x = type, y = y)) + 
  dviz.supp::stat_sina(color = palette[1], size = 0.3) +
  labs(title = "Sina Plots") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p5 <- ggplot(df_multi_dist, aes(x = y, fill = factor(type, levels = c("C", "A", "B")))) + 
  geom_histogram(color = palette[npal], binwidth = 0.5, center = 0) +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Stacked Histograms") +
  scale_x_continuous() +
  scale_y_continuous(limits = c(0, 49), expand = c(0, 0)) +
  theme_plot_icon(palette[npal], palette[1])

p6 <- ggplot(df_multi_dist, aes(x = y, fill = factor(type, levels = c("C", "A", "B")))) + 
  geom_density(alpha = 0.7, color = palette[npal]) +
  scale_fill_manual(values = palette[1:3]) +
  labs(title = "Overlapping Densities") +
  scale_x_continuous() +
  scale_y_continuous(limits = c(0, 1.1), expand = c(0, 0)) +
  theme_plot_icon(palette[npal], palette[1])

p7 <- ggplot(df_multi_dist, aes(x = y, y = number, group = number)) + 
  geom_density_ridges(alpha = 0.7, color = palette[npal], fill = palette[2], scale = 2.5) +
  labs(title = "Ridgeline Plot") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(limits = c(1, 6.5), expand = c(0, 0)) +
  theme_plot_icon(palette[npal], palette[1])


plot_grid(p1, p2, p3, p4, 
          p5, p6, p7, ncol = 4, scale = .9)
```

Boxplots, violins, strip charts, and sina plots are useful when we want to visualize many distributions at once and/or if we are primarily interested in overall shifts among the distributions (Chapter \@ref(boxplots-violins-vertical)). Stacked histograms and overlapping densities allow a more in-depth comparison of a smaller number of distributions, though stacked histograms can be difficult to interpret and are best avoided (Chapter \@ref(multiple-histograms-densities)). Ridgeline plots can be a useful alternative to violin plots and are often useful when visualizing very large numbers of distributions or changes in distributions over time (Chapter \@ref(boxplots-violins-horizontal)).

## Proportions

```{r proportions, fig.width = 5*6/4.2, fig.asp = 1/4}
palette <- pal_brown_green

p1_main <- ggplot(df_props, aes(x = 1, y = value, fill = group)) + 
  geom_col(position = "stack", color = palette[npal]) + 
  coord_polar(theta = "y") +
  scale_y_continuous(breaks = NULL, name = "") +
  scale_x_continuous(breaks = NULL, name = "") +
  scale_fill_manual(values = palette[2:4]) +
  theme_plot_icon_blank(palette[npal], palette[1]) +
  theme(plot.margin = margin(0, 0, 0, 0))

# make sure plot background is fully filled, as in the other plots
p1 <- ggdraw(p1_main) +
  labs(title = "Pie Chart") +
  theme_plot_icon_blank(palette[npal], palette[1])

p2 <- ggplot(df_props, aes(x = factor(1), y = value, fill = group)) + 
  geom_col(position = position_stack(reverse = TRUE), width = .45, color = palette[npal]) + 
  scale_y_continuous(limits = c(0, 108), expand = c(0, 0)) +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Stacked Bars") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p3 <- ggplot(df_props, aes(x = factor(1), y = value, fill = group)) + 
  geom_col(position = position_stack(reverse = TRUE), width = .45, color = palette[npal]) + 
  #scale_y_continuous(limits = c(0, 110), expand = c(0, 0), position = "right") +
  scale_y_continuous(limits = c(0, 110), expand = c(0, 0)) +
  coord_flip() +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Stacked Bars") +
  theme_plot_icon_vgrid(palette[npal], palette[1])

p4 <- ggplot(df_props, aes(x = group, y = value, fill = group)) + 
  geom_col(position="identity", color = palette[npal],
           width = .8) +
  scale_y_continuous(limits = c(0, 66), expand = c(0, 0)) +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Bars") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p5 <- ggplot(df_props, aes(x = fct_rev(group), y = value, fill = group)) + 
  geom_col(position="identity", color = palette[npal],
           width = .8) +
  scale_y_continuous(limits = c(0, 66), expand = c(0, 0)) +
  scale_fill_manual(values = palette[2:4]) +
  coord_flip() +
  labs(title = "Bars") +
  theme_plot_icon_vgrid(palette[npal], palette[1])


plot_grid(p1, p4, p5, p2, ncol = 4, scale = .9)
```

Proportions can be visualized as pie charts, side-by-side bars, or stacked bars (Chapter \@ref(visualizing-proportions)), and as in the case for amounts, bars can be arranged either vertically or horizontally. Pie charts emphasize that the individual parts add up to a whole and highlight simple fractions. However, the individual pieces are more easily compared in side-by-side bars. Stacked bars look awkward for a single set of proportions, but can be useful when comparing multiple sets of proportions (see below).


```{r proportions-comp, fig.width = 5*6/4.2, fig.asp = 1/4}
p5 <- ggplot(filter(df_fractions, x!=4), aes(x, y,
                                   fill=factor(type, levels = c("A", "C", "B")))) + 
  geom_col(position="dodge", color = palette[npal],
           width = .7) +
  scale_y_continuous(expand = c(0, 0),
                     limits = c(0, .58)) +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Grouped Bars") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p6 <- ggplot(df_fractions, aes(x, y, fill=type)) + 
  geom_col(position="stack", color = palette[npal]) +
  scale_y_continuous(limits = c(0, 1.08), expand = c(0, 0)) +
  scale_fill_manual(values = palette[2:4]) +
  labs(title = "Stacked Bars") +
  theme_plot_icon_hgrid(palette[npal], palette[1])

p7 <- ggplot(df_multi_dist, aes(x = y, fill = factor(type, levels = c("C", "A", "B")))) + 
  geom_density(color = palette[npal], position = "fill") +
  scale_fill_manual(values = palette[2:4]) +
  scale_x_continuous(expand = c(0.04, 0)) +
  scale_y_continuous(limits = c(0, 1.08), expand = c(0, 0)) +
  labs(title = "Stacked Densities") +
  theme_plot_icon(palette[npal], palette[1])

p8_a <- ggplot(filter(df_fractions, x==1), aes(x = 1, y = y, fill = type)) + 
  geom_col(position = "stack", color = palette[npal]) + 
  coord_polar(theta = "y") +
  scale_y_continuous(breaks = NULL, name = "") +
  scale_x_continuous(breaks = NULL, name = "") +
  scale_fill_manual(values = palette[c(2, 1, 3)]) +
  theme_plot_icon_blank(palette[npal], palette[1], font_size = 5) +
  theme(
    plot.background = element_blank(),
    plot.margin = margin(0, 0, 0, 0)
  )

p8_b <- ggplot(filter(df_fractions, x==2), aes(x = 1, y = y, fill = type)) + 
  geom_col(position = "stack", color = palette[npal]) + 
  coord_polar(theta = "y") +
  scale_y_continuous(breaks = NULL, name = "") +
  scale_x_continuous(breaks = NULL, name = "") +
  scale_fill_manual(values = palette[c(2, 1, 3)]) +
  theme_plot_icon_blank(palette[npal], palette[1], font_size = 5) +
  theme(
    plot.background = element_blank(),
    plot.margin = margin(0, 0, 0, 0)
  )

p8_c <- ggplot(filter(df_fractions, x==3), aes(x = 1, y = y, fill = type)) + 
  geom_col(position = "stack", color = palette[npal]) + 
  coord_polar(theta = "y") +
  scale_y_continuous(breaks = NULL, name = "") +
  scale_x_continuous(breaks = NULL, name = "") +
  scale_fill_manual(values = palette[c(2, 1, 3)]) +
  theme_plot_icon_blank(palette[npal], palette[1], font_size = 5) +
  theme(
    plot.background = element_blank(),
    plot.margin = margin(0, 0, 0, 0)
  )


# combine
p8 <- plot_grid(p8_a, p8_b, p8_c, ncol = 3, scale = 1.1) +
      labs(title = "Multiple Pie Charts") +
      theme_plot_icon_blank(palette[npal], palette[1])

plot_grid(p8, p5, p6, p7, ncol = 4, scale = .9)
```

When visualizing multiple sets of proportions or changes in proportions across conditions, pie charts tend to be space-inefficient and often obscure relationships. Grouped bars work well as long as the number of conditions compared is moderate, and stacked bars can work for large numbers of conditions. Stacked densities (Chapter \@ref(visualizing-proportions)) are appropriate when the proportions change along a continuous variable.

```{r proportions-multi, fig.width = 5*6/4.2, fig.asp = 1/4}
p1 <- ggplot(df_multi_props, aes(x = var2, y = count, fill = var1, width = group_count)) +
  geom_bar(stat = "identity", position = "fill", colour = palette[npal], size = 0.5) +
  facet_grid(~var2, scales = "free_x", space = "free_x") +
  scale_x_discrete(name = NULL, breaks = NULL) +
  scale_y_continuous(name = NULL, breaks = NULL, expand = c(0, 0)) +
  scale_fill_manual(values = palette[4:2], guide = "none") +
  coord_cartesian(clip = "off") +
  labs(title = "Mosaic Plot") +
  theme_plot_icon_blank(palette[npal], palette[1]) +
  theme(
    strip.text = element_blank(),
    panel.spacing.x = unit(0, "pt")
  )
  
p2 <- ggplot(df_multi_props, aes(area = count, subgroup = var2, fill = var2)) +
  geom_treemap(color = palette[npal], size = 0.5*.pt, alpha = NA) + 
  geom_treemap_subgroup_border(color = palette[npal], size = 1.5*.pt) +
  scale_fill_manual(values = palette[4:2], guide = "none") +
  coord_cartesian(clip = "off") +
  labs(title = "Treemap") +
  theme_plot_icon_blank(palette[npal], palette[1]) 

p3 <- ggplot(df_sets, aes(x, id = id, split = y, value = count)) +
  geom_parallel_sets(aes(fill = var1), alpha = 0.7, axis.width = 0.15) +
  geom_parallel_sets_axes(axis.width = 0.06, fill = palette[2], color = palette[2]) +
  scale_x_discrete(
    name = NULL,
    breaks = NULL,
    expand = c(0, 0.15/2)
  ) +
  scale_y_continuous(breaks = NULL, expand = c(0, 0)) +
  scale_fill_manual(values = c(palette[3], palette[2]), guide = "none") +
  labs(title = "Parallel Sets") +
  theme_plot_icon_blank(palette[npal], palette[1])

plot_grid(p1, p2, p3, ncol = 4, scale = .9)
```

When proportions are specified according to multiple grouping variables, then mosaic plots, treemaps, or parallel sets are useful visualization approaches
(Chapter \@ref(nested-proportions)). Mosaic plots assume that every level of one grouping variable can be combined with every level of another grouping variable, whereas treemaps do not make such an assumption. Treemaps work well even if the subdivisions of one group are entirely distinct from the subdivisions of another. Parallel sets work better than either mosaic plots or treemaps when there are more than two grouping variables.


## *x*--*y* relationships

```{r basic-scatter, fig.width = 5*6/4.2, fig.asp = 1/4}
palette <- pal_steel_blue

p1 <- ggplot(df_scatter_xy, aes(x, y)) + 
  geom_point(fill = palette[2], color = palette[npal], pch = 21, size = 2.4) + 
  scale_x_continuous(expand = c(.2, 0)) +
  scale_y_continuous(expand = c(.2, 0)) +
  labs(title = "Scatterplot") +
  theme_plot_icon(palette[npal], palette[1])

p2 <- ggplot(df_scatter_xyz, aes(x, y, size = z)) + 
  geom_point(fill = palette[2], color = palette[npal], pch = 21, alpha = 0.7) + 
  scale_x_continuous(expand = c(.2, 0)) +
  scale_y_continuous(expand = c(.2, 0)) +
  scale_radius(range = c(2, 8)) +
  labs(title = "Bubble Chart") +
  theme_plot_icon(palette[npal], palette[1])

p3 <- ggplot(spread(df_paired, x, y), aes(A, B)) + 
  geom_abline(slope = 1, intercept = 0, color = palette[3], size = 0.3) + 
  geom_point(
    shape = 21, size = 2.4, stroke = 1,
    fill = palette[2], color = palette[npal]
  ) +
  scale_x_continuous(limits = c(1.5, 6.5)) +
  scale_y_continuous(limits = c(1.5, 6.5)) +
  labs(title = "Paired Scatterplot") +
  theme_plot_icon(palette[npal], palette[1])

p4 <- ggplot(df_paired, aes(x, y, group = group)) + 
  geom_line(color = palette[1]) + 
  geom_point(
    shape = 21, size = 2.4, stroke = 1,
    fill = palette[2], color = palette[npal]
  ) +
  scale_x_discrete(expand = c(0, 0.4)) +
  scale_y_continuous(limits = c(1.5, 6.5)) +
  labs(title = "Slopegraph") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.x = element_blank(),
    axis.ticks.x = element_blank()
  )

plot_grid(p1, p2, p3, p4, ncol = 4, scale = .9)
```


Scatterplots represent the archetypical visualization when we want to show one quantitative variable relative to another (Chapter \@ref(associations-scatterplots)). If we have three quantitative variables, we can map one onto the dot size, creating a variant of the scatterplot called bubble chart. For paired data, where the variables along the *x* and the *y* axes are measured in the same units, it is generally helpful to add a line indicating *x* = *y* (Chapter \@ref(associations-paired-data)). Paired data can also be shown as a slope graph of paired points connected by straight lines (Chapter \@ref(associations-paired-data)).

```{r xy-binning, fig.width = 5*6/4.2, fig.asp = 1/4}
p5 <- ggplot(df_dense_scatter, aes(x, y)) + 
  geom_density2d(binwidth = 0.02, color = palette[1]) +
  scale_x_continuous(limits = c(-2, 3.6), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-4, 5), expand = c(0, 0)) +
  labs(title = "Density Contours") +
  theme_plot_icon(palette[npal], palette[1])

p6 <- ggplot(df_dense_scatter, aes(x, y)) + 
  geom_bin2d(bins = 12, color = palette[npal], size = 0.5) +
  scale_x_continuous(limits = c(-2, 3.6), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-4, 5), expand = c(0, 0)) +
  scale_fill_gradientn(colors = palette[1:(npal-1)]) +
  labs(title = "2D Bins") +
  theme_plot_icon(palette[npal], palette[1])

p7 <- ggplot(df_dense_scatter, aes(x, y)) + 
  geom_hex(bins = 12, color = palette[npal], size = 0.5) +
  scale_x_continuous(limits = c(-2, 3.6), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-4, 5), expand = c(0, 0)) +
  scale_fill_gradientn(colors = palette[1:(npal-1)]) +
  labs(title = "Hex Bins") +
  theme_plot_icon(palette[npal], palette[1])

cm <- cor(select(mtcars, mpg, hp, drat, wt, qsec))
df_wide <- as.data.frame(cm)
df_long <- stack(df_wide)
names(df_long) <- c("cor", "var1")
df_long <- cbind(df_long, var2 = rep(rownames(cm), length(rownames(cm))))
clust <- hclust(as.dist(1-cm), method="average") 
levels <- clust$labels[clust$order]
df_long$var1 <- factor(df_long$var1, levels = levels)
df_long$var2 <- factor(df_long$var2, levels = levels)
p8 <- ggplot(filter(df_long, as.integer(var1) < as.integer(var2)),
       aes(var1, var2, fill=cor, size = abs(cor))) + 
  geom_point(shape = 21, stroke = 0) + 
  scale_x_discrete(position = "top", name = NULL, expand = c(0, 0.5)) +
  scale_y_discrete(name = NULL, expand = c(0, 0.5)) +
  scale_size_area(max_size = 8, limits = c(0, 0.9), guide = "none") +
  scale_fill_gradient2(high = palette[2], mid = palette[npal], low = pal_steel_blue_inv[2], guide = "none") +
  labs(title = "Correlogram") +
  theme_plot_icon(palette[npal], palette[1])


plot_grid(p5, p6, p7, p8, ncol = 4, scale = .9)
```

For large numbers of points, regular scatterplots can become uninformative due to overplotting. In this case, contour lines, 2D bins, or hex bins may provide an alternative (Chapter \@ref(overlapping-points)). When we want to visualize more than two quantities, on the other hand, we may choose to plot correlation coefficients in the form of a correlogram instead of the underlying raw data (Chapter \@ref(associations-correlograms)).

```{r xy-lines, fig.width = 5*6/4.2, fig.asp = 1/4}
p1 <- ggplot(df_one_line, aes(x, y)) +
  geom_line(color = palette[1]) + 
  geom_point(
    shape = 21, size = 2.4, stroke = 1,
    fill = palette[2], color = palette[npal]
  ) +
  scale_x_continuous(limits = c(0.5, 5.5), breaks = c(1, 3, 5)) +
  scale_y_continuous(limits = c(2.8, 4.8)) +
  labs(title = "Line Graph") +
  theme_plot_icon(palette[npal], palette[1])

p2 <- ggplot(df_connected_scatter, aes(x, y, color = t, fill = t)) +
  geom_path() +
  geom_point(
    shape = 21, size = 2.4, stroke = 1,
    color = palette[npal]
  ) +
  scale_color_gradientn(
    aesthetics = c("colour", "fill"),
    colors = palette[(npal-2):1]
  ) +
  scale_x_continuous(limits = c(0.3, 3.7)) +
  scale_y_continuous(limits = c(-2.5, 2.5)) +
  labs(title = "Connected Scatterplot") +
  theme_plot_icon(palette[npal], palette[1])

p3 <- ggplot(df_dense_scatter_sample, aes(x, y)) +
  geom_point(color = palette[2], size = 0.3, alpha = 1/2) +
  geom_smooth(
    color = palette[1],
    fill = palette[npal-2],
    size = 0.5,
    se = FALSE
  ) +
  scale_y_continuous(limits = c(-5, 5)) +
  labs(title = "Smooth Line Graph") +
  theme_plot_icon(palette[npal], palette[1])

plot_grid(p1, p2, p3, ncol = 4, scale = .9)
```

When the *x* axis represents time or a strictly increasing quantity such as a treatment dose, we commonly draw line graphs (Chapter \@ref(time-series)). If we have a temporal sequence of two response variables, we can draw a connected scatterplot where we first plot the two response variables in a scatterplot and then connect dots corresponding to adjacent time points (Chapter \@ref(time-series-connected-scatter)). We can use smooth lines to represent trends in a larger dataset (Chapter \@ref(visualizing-trends)). 


## Geospatial data {#directory-geospatial-data}

```{r geospatial, fig.width = 5*6/4.2, fig.asp = 1/4}
palette <- pal_green_brown

lower48 <- mutate(
  US_income,
  income_bins = cut(
    ifelse(is.na(median_income), 25000, median_income), # hide missing value
    breaks = c(0, 40000, 50000, 60000, 70000, 80000)
  )
) %>% filter(!name %in% c("Alaska", "Hawaii", "District of Columbia"))


p1_main <- ggplot(lower48) +
  geom_sf(color = palette[1], fill = palette[4], size = 0.3) +
  coord_sf(datum = NA, expand = FALSE) +
  scale_x_continuous(limits = c(-2500000, 100000)) +
  scale_y_continuous(limits = c(-900000, 1558935)) +
  theme_plot_icon_blank(palette[npal], palette[1]) +
  theme(
    plot.margin = margin(2, 5, 3, 5)
  )

# make sure plot background is fully filled, as in the other plots
p1 <- ggdraw(p1_main) +
  labs(title = "Map") +
  theme_plot_icon_blank(palette[npal], palette[1])


p2_main <- ggplot(lower48, aes(fill = income_bins)) +
  geom_sf(color = palette[1], size = 0.2) +
  coord_sf(datum = NA, expand = FALSE) +
  scale_x_continuous(limits = c(-2500000, 100000)) +
  scale_y_continuous(limits = c(-900000, 1558935)) +
  scale_fill_manual(values = palette) +
  theme_plot_icon_blank(palette[npal], palette[1]) +
  theme(
    plot.margin = margin(2, 5, 3, 5)
  )

p2 <- ggdraw(p2_main) +
  labs(title = "Choropleth") +
  theme_plot_icon_blank(palette[npal], palette[1])

lower48_carto <- mutate(
  US_income_cartogram,
  income_bins = cut(
    ifelse(is.na(median_income), 25000, median_income), # hide missing value
    breaks = c(0, 40000, 50000, 60000, 70000, 80000)
  )
) %>% filter(!name %in% c("Alaska", "Hawaii", "District of Columbia"))

p3_main <- ggplot(lower48_carto, aes(fill = income_bins)) +
  geom_sf(color = palette[1], size = 0.2) +
  coord_sf(datum = NA, expand = FALSE) +
  scale_x_continuous(limits = c(-2500000, 100000)) +
  scale_y_continuous(limits = c(-1000000, 1458935)) +
  scale_fill_manual(values = palette) +
  theme_plot_icon_blank(palette[npal], palette[1]) +
  theme(
    plot.margin = margin(2, 5, 3, 5)
  )

p3 <- ggdraw(p3_main) +
  labs(title = "Cartogram") +
  theme_plot_icon_blank(palette[npal], palette[1])

lower48_small <- filter(lower48, GEOID %in% c(
  "04", "06", "08", "16", "20", "30", "31", "32", "35", "38", "41", "46", "49", "53", "56"))

p4_main <- ggplot(lower48_small, aes(state = name, fill = income_bins)) +
  geom_statebins(
    family = dviz.supp::dviz_font_family,
    lbl_size = 8/.pt,
    border_size = 1.,
    border_col = palette[npal]
  ) +
  coord_equal(xlim = c(1.5, 5.5), ylim = c(-2.5, -6.5), expand = FALSE, clip = "off") +
  scale_fill_manual(values = palette[2:5]) +
  theme_plot_icon_blank(palette[npal], palette[1]) +
  theme(
    plot.margin = margin(2, 0, 0, 7)
  )

p4 <- ggdraw(p4_main) + labs(title = "Cartogram Heatmap") +
  theme_plot_icon_blank(palette[npal], palette[1])

plot_grid(p1, p2, p3, p4, scale = 0.9, nrow = 1)
```

The primary mode of showing geospatial data is in the form of a map (Chapter \@ref(geospatial-data)). A map takes coordinates on the globe and projects them onto a flat surface, such that shapes and distances on the globe are approximately represented by shapes and distances in the 2D representation. In addition, we can show data values in different regions by coloring those regions in the map according to the data. Such a map is called a choropleth (Chapter \@ref(choropleth-mapping)). In some cases, it may be helpful to distort the different regions according to some other quantity (e.g., population number) or simplify each region into a square. Such visualizations are called cartograms.

## Uncertainty {#directory-uncertainty}

```{r errorbars, fig.width = 5*6/4.2, fig.asp = 1/4}
palette <- pal_brown_red

p1 <- ggplot(df_uncertain, aes(y, type)) +
  geom_errorbarh(
    aes(xmin = y-dy, xmax = y+dy),
    color = palette[1], height = 0.2, size = 0.5
  ) +
  geom_point(
    color = palette[1],
    size = 2
  ) +
  labs(title = "Error Bars") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()
  )

p2 <- ggplot(df_uncertain, aes(type, y)) +
  geom_col(fill = palette[3], width = 0.8) +
  geom_segment(
    aes(xend = type, y = y-dy, yend = y+dy),
    color = palette[1],
    size = 0.7
  ) +
  scale_y_continuous(limits = c(0, 6), expand = c(0, 0)) +
  labs(title = "Error Bars") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.x = element_blank(),
    axis.ticks.x = element_blank()
  )

p3 <- ggplot(df_uncertain, aes(y, type)) +
  geom_errorbarh(
    aes(xmin = y-2.58*dy, xmax = y+2.58*dy), # 99% CI
    color = palette[3], height = 0, size = 0.5
  ) +
  geom_errorbarh(
    aes(xmin = y-1.96*dy, xmax = y+1.96*dy), # 95% CI
    color = palette[2], height = 0, size = 1
  ) +
  geom_errorbarh(
    aes(xmin = y-1.28*dy, xmax = y+1.28*dy), # 80% CI
    color = palette[1], height = 0, size = 1.5
  ) +
  #geom_errorbarh(
  #  aes(xmin = y-dy, xmax = y+dy),
  #  color = palette[1], height = 0.1, size = 0.5
  #) +
  geom_point(
    color = palette[1],
    size = 2
  ) +
  labs(title = "Graded Error Bars") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()
  )

p4 <- ggplot(df_uncertain, aes(x, y)) +
  geom_point(color = palette[1], size = 2) +
  geom_segment(
    aes(xend = x, y = y-dy, yend = y+dy),
    color = palette[1],
    size = 0.7
  ) +
  geom_segment(
    aes(yend = y, x = x-dx, xend = x+dx),
    color = palette[1],
    size = 0.7
  ) +
  scale_x_continuous(limits = c(1, 4)) +
  scale_y_continuous(limits = c(2, 6)) +
  labs(title = "2D Error Bars") +
  theme_plot_icon(palette[npal], palette[1])
  

plot_grid(p1, p2, p4, p3, ncol = 4, scale = .9)
  
```

Error bars are meant to indicate the range of likely values for some estimate or measurement. They extend horizontally and/or vertically from some reference point representing the estimate or measurement (Chapter \@ref(visualizing-uncertainty)). Reference points can be shown in various ways, such as by dots or by bars. Graded 
error bars show multiple ranges at the same time, where each range corresponds to a different degree of confidence. They are in effect multiple error bars with different line thicknesses plotted on top of each other.

```{r confidence-dists, fig.width = 5*6/4.2, fig.asp = 1/4}

p1 <- ggplot(df_uncertain, aes(y, type)) +
  stat_confidence_density(aes(moe = dy), fill = palette[3], height = 0.6, confidence = 0.68) +
  scale_x_continuous(limits = c(1.6, 6.4), expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 1)) +
  labs(title = "Confidence Strips") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()
  )


p2 <- ggplot(df_uncertain, aes(y, type)) +
  geom_ribbon(
    data = filter(df_uncertain, type == "A"),
    aes(moe = dy, ymin = 1 - .5*stat(density), ymax = 1 + .5*stat(density)),
    stat = "confidence_density",
    fill = palette[3], color = NA, alpha = NA, confidence = 0.68
  ) +
  geom_ribbon(
    data = filter(df_uncertain, type == "B"),
    aes(moe = dy, ymin = 2 - .5*stat(density), ymax = 2 + .5*stat(density)),
    stat = "confidence_density",
    fill = palette[3], color = NA, alpha = NA, confidence = 0.68
  ) +
  geom_ribbon(
    data = filter(df_uncertain, type == "C"),
    aes(moe = dy, ymin = 3 - .5*stat(density), ymax = 3 + .5*stat(density)),
    stat = "confidence_density",
    fill = palette[3], color = NA, alpha = NA, confidence = 0.68
  ) +
  geom_errorbarh(
    aes(xmin = y-1.28*dy, xmax = y+1.28*dy), 
    color = palette[1], height = 0, size = 0.5
  ) +
  geom_point(
    color = palette[1],
    size = 2
  ) +
  scale_x_continuous(limits = c(1.6, 6.4), expand = c(0, 0)) +
  scale_y_discrete(expand = expand_scale(add = c(0.8, 0.8))) +
  labs(title = "Eyes") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()
  )

p3 <- ggplot(df_uncertain, aes(y, type)) +
  stat_confidence_density(
    aes(moe = dy, height = .9*stat(density)),
    geom = "ridgeline",
    fill = palette[3], color = NA, alpha = NA, confidence = 0.68
  ) +
  geom_errorbarh(
    aes(xmin = y-1.28*dy, xmax = y+1.28*dy),
    color = palette[1], height = 0, size = 0.5
  ) +
  geom_point(
    color = palette[1],
    size = 2
  ) +
  scale_x_continuous(limits = c(1.6, 6.4), expand = c(0, 0)) +
  scale_y_discrete(expand = expand_scale(add = c(0.2, 0.8))) +
  labs(title = "Half-Eyes") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()
  )

df_norm <- data.frame(
  x = seq(-3, 3, length.out = 100),
  y = dnorm(seq(-3, 3, length.out = 100))
)
df_q <- data.frame(x = qnorm(ppoints(20)))

p4 <- ggplot(df_q, aes(x)) +
  geom_line(data = df_norm, aes(x, .36*y), color = palette[1], na.rm = FALSE, size = 0.25) + # factor .36 manually determined
  geom_dotplot(binwidth = .4, fill = palette[3], color = palette[1]) +
  scale_x_continuous(
    limits = c(-2.8, 2.8),
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    expand = c(0.02, 0),
    limits = c(0, 0.4)
  ) +
  labs(title = "Quantile Dot Plot") +
  theme_plot_icon(palette[npal], palette[1]) +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank()
  )

plot_grid(p1, p2, p3, p4, ncol = 4, scale = .9)
  
```

To achieve a more detailed visualization than is possible with error bars or graded error bars, we can visualize the actual confidence or posterior distributions (Chapter \@ref(visualizing-uncertainty)). Confidence strips provide a clear visual sense of uncertainty but are difficult to read accurately. Eyes and half-eyes combine error bars with approaches to visualize distributions (violins and ridgelines, respectively), and thus show both precise ranges for some confidence levels and the overall uncertainty distribution. A quantile dot plot can serve as an alternative visualization of an uncertainty distribution (Chapter \@ref(frequency-framing)). By showing the distribution in discrete units, the quantile dot plot is not as precise but can be easier to read than the continuous distribution shown by a violin or ridgeline plot.

```{r confidence-bands, fig.width = 5*6/4.2, fig.asp = 1/4}
p1 <- ggplot(df_dense_scatter_sample, aes(x, y)) +
  geom_smooth(
    color = palette[1],
    fill = palette[npal-2],
    size = 0.5,
    level = 0.95
  ) +
  scale_y_continuous(limits = c(-5, 5)) +
  labs(title = "Confidence Band") +
  theme_plot_icon(palette[npal], palette[1])

p2 <- ggplot(df_dense_scatter_sample, aes(x, y)) +
  geom_smooth(color = NA, fill = palette[npal-1], level = 0.99) +
  geom_smooth(color = NA, fill = palette[npal-2], level = 0.95) +
  geom_smooth(
    color = palette[1],
    fill = palette[npal-3],
    size = 0.5,
    level = 0.8
  ) +
  scale_y_continuous(limits = c(-5, 5)) +
  labs(title = "Graded Confidence Band") +
  theme_plot_icon(palette[npal], palette[1])

p3 <- ggplot(df_dense_scatter_sample, aes(x, y)) +
  stat_smooth_draws(
    times = 8,
    aes(group = stat(.draw)),
    color = palette[1],
    size = 0.15
  ) +
  scale_y_continuous(limits = c(-5, 5)) +
  labs(title = "Fitted Draws") +
  theme_plot_icon(palette[npal], palette[1])

plot_grid(p1, p2, p3, ncol = 4, scale = .9)
```

For smooth line graphs, the equivalent of an error bar is a confidence band (Chapter \@ref(uncertainty-curve-fits)). It shows a range of values the line might pass through at a given confidence level. As in the case of error bars, we can draw graded confidence bands that show multiple confidence levels at once. We can also show individual fitted draws in lieu of or in addition to the confidence bands.