diff --git a/lab 4.html b/lab 4.html new file mode 100644 index 0000000..3aeeeca --- /dev/null +++ b/lab 4.html @@ -0,0 +1,9204 @@ + + + + + + + + + + +PM 566 Lab 4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

PM 566 Lab 4

+
+ + + +
+ +
+
Author
+
+

Erica Shin

+
+
+ + + +
+ + + +
+ + +
+

Exercise 1

+
+
if (!file.exists("met_all.gz"))
+  download.file(
+    url = "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
+    destfile = "met_all.gz",
+    method   = "libcurl",
+    timeout  = 60
+    )
+met <- data.table::fread("met_all.gz")
+
+library(data.table)
+
+
Warning: package 'data.table' was built under R version 4.4.1
+
+
+
+
+

Exercise 2

+
+
#removing temperatures less than -17 degrees Celsius
+met <- met[met$temp > -17][elev == 9999.0, elev := NA]
+
+#date variable
+
+#making new dataset and adding date variable
+met_new <- met[,.(
+  date = as.Date(paste(year, month, day, sep = "-"))
+)]
+
+#met$date <- paste(met$year, met$month, met$day, sep="-")
+
+#adding date variable to existing dataset
+met$date <- as.Date(paste(met$year, met$month, met$day, sep = "-"))
+
+#first week
+met$firstweek <- data.table::week(met$date)
+
+met <- met[met$firstweek == 31]
+
+#mean variables
+met_avg <- met[,.(
+  temp = mean(temp, na.rm=TRUE),
+  rh = mean(rh, na.rm=TRUE),
+  wind.sp = mean(wind.sp, na.rm=TRUE),
+  vis.dist = mean(vis.dist, na.rm=TRUE),
+  dew.point = mean(dew.point, na.rm=TRUE),
+  lat = mean(lat, na.rm=TRUE),
+  lon = mean(lon, na.rm=TRUE),
+  elev = mean(elev, na.rm=TRUE)
+), by=c("USAFID", "day")]
+
+#creating region variable
+met_avg[, region1 := fifelse(lat > 39.71, "N", "S")]
+met_avg[, region2 := fifelse(lon > -98, "E", "W")]
+met_avg$region <- paste(met_avg$region1, met_avg$region2, sep="")
+
+
+
+

Exercise 3

+
+
library(tidyverse)
+
+
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+✔ dplyr     1.1.4     ✔ readr     2.1.5
+✔ forcats   1.0.0     ✔ stringr   1.5.1
+✔ ggplot2   3.5.1     ✔ tibble    3.2.1
+✔ lubridate 1.9.3     ✔ tidyr     1.3.1
+✔ purrr     1.0.2     
+── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+✖ dplyr::between()     masks data.table::between()
+✖ dplyr::filter()      masks stats::filter()
+✖ dplyr::first()       masks data.table::first()
+✖ lubridate::hour()    masks data.table::hour()
+✖ lubridate::isoweek() masks data.table::isoweek()
+✖ dplyr::lag()         masks stats::lag()
+✖ dplyr::last()        masks data.table::last()
+✖ lubridate::mday()    masks data.table::mday()
+✖ lubridate::minute()  masks data.table::minute()
+✖ lubridate::month()   masks data.table::month()
+✖ lubridate::quarter() masks data.table::quarter()
+✖ lubridate::second()  masks data.table::second()
+✖ purrr::transpose()   masks data.table::transpose()
+✖ lubridate::wday()    masks data.table::wday()
+✖ lubridate::week()    masks data.table::week()
+✖ lubridate::yday()    masks data.table::yday()
+✖ lubridate::year()    masks data.table::year()
+ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
+
library(data.table)
+library(ggplot2)
+
+met_avg[!is.na(wind.sp)] |>
+  ggplot()+
+  geom_violin(mapping=aes(x=1, y=wind.sp, fill=region)) +
+  facet_wrap(~ region, nrow=1)
+
+
+
+

+
+
+
+
met_avg[!is.na(dew.point)] |>
+  ggplot()+
+  geom_violin(mapping=aes(x=1, y=dew.point, fill=region)) +
+  facet_wrap(~ region, nrow=1)
+
+
+
+

+
+
+
+
+
+
+

Exercise 4

+
+
library(cowplot) #to use plot_grid
+
+

+Attaching package: 'cowplot'
+
+
+
The following object is masked from 'package:lubridate':
+
+    stamp
+
+
#using position=jitter in geom_point
+nojitter <- ggplot(data=met_avg) +
+  geom_point(mapping=aes(x=dew.point, y=wind.sp, fill=region))
+
+jitter <- ggplot(data=met_avg) +
+  geom_point(mapping=aes(x=dew.point, y=wind.sp, fill=region), position="jitter")
+
+plot_grid(nojitter, jitter, labels = "AUTO")
+
+
Warning: Removed 67 rows containing missing values or values outside the scale range
+(`geom_point()`).
+Removed 67 rows containing missing values or values outside the scale range
+(`geom_point()`).
+
+
+
+
+

+
+
+
+
#using geom_jitter
+nojitter1 <- ggplot(met_avg, aes(dew.point, wind.sp, colour=region)) + 
+  stat_smooth() +
+  geom_point()
+
+jitter1 <- ggplot(met_avg, aes(dew.point, wind.sp, colour=region)) + 
+  stat_smooth() +
+  geom_point() +
+  geom_jitter(width=1, height=1)
+
+plot_grid(nojitter1, jitter1, labels = "AUTO")
+
+
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
+
+
+
Warning: Removed 67 rows containing non-finite outside the scale range
+(`stat_smooth()`).
+Removed 67 rows containing missing values or values outside the scale range
+(`geom_point()`).
+
+
+
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
+
+
+
Warning: Removed 67 rows containing non-finite outside the scale range
+(`stat_smooth()`).
+Removed 67 rows containing missing values or values outside the scale range
+(`geom_point()`).
+Removed 67 rows containing missing values or values outside the scale range
+(`geom_point()`).
+
+
+
+
+

+
+
+
+
+
+
+

Exercise 5

+
+
met_avg[, elev_cat := fifelse(elev > 252, "high", "low")]
+
+met_avg[!is.na(elev_cat)] |>
+  ggplot() +
+  geom_bar(mapping=aes(x=elev_cat, fill=region), position="dodge") + 
+  scale_fill_brewer(palette = "PiYG") + 
+  labs(title = "Barplots of Elevation Category Colored by Region") +
+  labs(x="Elevation Category", y="Count of Weather Stations")
+
+
+
+

+
+
+
+
+
+
+

Exercise 6

+
+
summary <- met_avg[!is.na(dew.point) & !is.na(wind.sp)] |>
+  ggplot(mapping=aes(x=dew.point, y=wind.sp, colour=region)) +
+  stat_summary(
+               fun.min=min,
+               fun.max=max,
+               fun=median,
+               fun.data="mean_sdl"
+               )
+summary
+
+
Warning: Removed 6153 rows containing missing values or values outside the scale range
+(`geom_segment()`).
+
+
+
+
+

+
+
+
+
#why is errorbar not working??
+
+#  stat_summary(
+#               fun.min=min,
+#               fun.max=max,
+#               fun=median,
+#               fun.data="mean_sdl"
+#               ) +
+#  geom_errorbar(aes(ymin=min, ymax=max))
+
+
+
+

Exercise 7

+
+
library(leaflet)
+
+#dataset with mean rh
+met_avg2 <- met[,.(rh = mean(rh, na.rm=TRUE), lat = mean(lat), lon = mean(lon)), by=c("USAFID")]
+met_avg2 <- met_avg2[!is.na(rh)]
+
+#generating a color palette
+rh.pal <- colorNumeric(c('lightgreen','pink','purple'), domain=met_avg2$rh)
+rh.pal
+
+
function (x) 
+{
+    if (length(x) == 0 || all(is.na(x))) {
+        return(pf(x))
+    }
+    if (is.null(rng)) 
+        rng <- range(x, na.rm = TRUE)
+    rescaled <- scales::rescale(x, from = rng)
+    if (any(rescaled < 0 | rescaled > 1, na.rm = TRUE)) 
+        warning("Some values were outside the color scale and will be treated as NA")
+    if (reverse) {
+        rescaled <- 1 - rescaled
+    }
+    pf(rescaled)
+}
+<bytecode: 0x117757d20>
+<environment: 0x11715db80>
+attr(,"colorType")
+[1] "numeric"
+attr(,"colorArgs")
+attr(,"colorArgs")$na.color
+[1] "#808080"
+
+
#leaflet
+top_rh <- met_avg2 |>
+  filter(rank(-rh) <= 10)
+
+rh_map <- leaflet(met_avg2) |>
+  addProviderTiles("CartoDB.Positron") |>
+  addCircles(lat=~lat, 
+             lng=~lon,
+             color=~rh.pal(rh)) |>
+  addMarkers(data=top_rh,
+             lat=~lat, 
+             lng=~lon,
+             clusterOptions = markerClusterOptions()) |>
+  addLegend(position='bottomleft', pal=rh.pal, values=~rh, title="Relative Humidity (%)")
+
+rh_map
+
+
+ +
+
+
+
+

Exercise 8

+
+
#regular boxplot
+ggplot(met_avg, aes(dew.point, wind.sp, group=region)) +
+  geom_boxplot()
+
+
Warning: Removed 2 rows containing missing values or values outside the scale range
+(`stat_boxplot()`).
+
+
+
Warning: Removed 65 rows containing non-finite outside the scale range
+(`stat_boxplot()`).
+
+
+
+
+

+
+
+
+
#ggplot extension
+library(gganimate)
+
+
No renderer backend detected. gganimate will default to writing frames to separate files
+Consider installing:
+- the `gifski` package for gif output
+- the `av` package for video output
+and restarting the R session
+
+
ggplot(met_avg, aes(dew.point, wind.sp, group=region)) +
+  geom_boxplot() +
+  transition_states(
+    region,
+    transition_length = 2,
+    state_length = 1
+  ) +
+  enter_fade() + 
+  exit_shrink() +
+  ease_aes('sine-in-out')
+
+
Warning: No renderer available. Please install the gifski, av, or magick package to
+create animated output
+
+
+
NULL
+
+
+
+ +
+ + +
+ + + + + \ No newline at end of file