The goal of sparseCorrespondenceAnalysis is to illustrate Correspondence Analysis and its sparsification to a data-set of the cause of deaths in the United States in 2019.
You can install the development version of
sparseCorrespondenceAnalysis
from GitHub with:
devtools::install_github("vguillemot/sparseCorrespondenceAnalysis")
First load the package and the “Cause of death” data set.
library(sparseCorrespondenceAnalysis)
#> Le chargement a nécessité le package : PMA
#> Le chargement a nécessité le package : ggplot2
#> Le chargement a nécessité le package : ggrepel
data("death.2019")
Then apply the sCAwithPMD
to the data:
sca.res <- sCAwithPMD(
DATA = death.2019, # Contingency table
dimensions = 2L, # the number of dimensions
doublecentering = TRUE, # center the data
s1 = rep(0.5 * sqrt(nrow(death.2019)), 2), # Asking for a medium amount of sparsity
s2 = rep(0.5 * sqrt(ncol(death.2019)), 2)
)
sca.fi.map.12 <- createFactorMap(X = sca.res$fi,
col.background = NULL,
col.axes = "#42376B",
width.axes = 0.5,
title = "SCA: row factor scores",
alpha.axes = 0.5,
alpha.points = 0.5,
pch = 16,
axis1 = 1,
axis2 = 2,
constraints = NULL, text.cex = 4)
sca.fi.plot.12 <- sca.fi.map.12$zeMap_background + sca.fi.map.12$zeMap_dots + sca.fi.map.12$zeMap_text + geom_path(color = "darkorchid4") + theme(axis.title = element_text(color = "#42376B"), axis.text = element_text(color = "#42376B"), title = element_text(color = "#42376B"), panel.border = element_rect(size = 1.5, color = "#42376B", fill = NA)) + labs(x = "Dimension 1", y = "Dimension 2")
sca.fi.plot.12
sca.fj.map.12 <- createFactorMap(X = sca.res$fj,
col.background = NULL,
col.axes = "#42376B",
width.axes = 0.5,
title = "SCA: row factor scores",
alpha.axes = 0.5,
alpha.points = 0.5,
pch = 16,
axis1 = 1,
axis2 = 2,
constraints = NULL,
text.cex = 4)
sca.fj.plot.12 <- sca.fj.map.12$zeMap_background + sca.fj.map.12$zeMap_dots + sca.fj.map.12$zeMap_text + theme(axis.title = element_text(color = "#42376B"), axis.text = element_text(color = "#42376B"), title = element_text(color = "#42376B"), panel.border = element_rect(size = 1.5, color = "#42376B", fill = NA)) + labs(x = "Dimension 1", y = "Dimension 2")
sca.fj.plot.12
#> Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps