-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrides_and_shifts_visualization.R
79 lines (70 loc) · 2.36 KB
/
rides_and_shifts_visualization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
load("../Rdata/one_week_taxi.Rdata")
load("../Rdata/shifts.Rdata")
library(ggplot2)
library(scales)
library(tidyr)
library(dplyr)
theme_set(theme_minimal())
#########################################################################
## see pickups and dropoffs for hundred random taxis throughout the week
#########################################################################
#get n random drivers
n_random_drivers <- function(x = taxi_clean_shifts, n = 100)
{
sample(unique(x$hack_license), n)
}
#filter dataframe by random drivers
filter_df_by_drivers <- function(x, random_drivers)
{
x %>% filter(hack_license %in% random_drivers)
}
# get driver levels arranged by starttime
get_driver_levels <- function(filtered_df)
{
filtered_df %>%
group_by(hack_license) %>%
summarize(level = min(pickup_datetime)) %>%
arrange(level)
}
# set levels to use as factor
set_driver_levels <- function(x, levels)
{
factor(x$hack_license, levels = rev(levels$hack_license))
}
# plot shifts and rides
plot_ <- function(rides, shifts, n = 100)
{
ggplot() +
geom_segment(data =rides,
aes(x=pickup_datetime, xend=dropoff_datetime,
y =as.factor(hack_license), yend=as.factor(hack_license)),
size = 2) +
geom_segment(data =shifts,
aes(x=start_shift, xend=end_shift,
y=as.factor(hack_license), yend=as.factor(hack_license)),
color="red", alpha=0.3, size = 2) +
scale_shape_manual(values = c(4, 1)) +
scale_x_datetime(date_breaks = "2 hour",labels=date_format("%H")) +
scale_y_discrete(labels= 1:n) +
xlab("hour of day") +
ylab(paste(n, "random drivers"))
}
###############
# main function
###############
visualize_rides_and_shifts <- function(x = taxi_clean_shifts, n=100)
{
random_drivers <- n_random_drivers(x = x, n = n)
rides <- filter_df_by_drivers(x = x,
random_drivers = random_drivers)
shifts <- filter_df_by_drivers(x = shifts_clean,
random_drivers = random_drivers)
levels <- get_driver_levels(rides)
rides$hack_license <- set_driver_levels(rides, levels)
shifts$hack_license <- set_driver_levels(shifts, levels)
plot_(rides, shifts, n)
}
rides_and_shifts <- visualize_rides_and_shifts()
ggsave("../figures/100_random_drivers_rides_n_shfits.png",
plot = rides_and_shifts,
scale = 3)