-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFunctions.R
221 lines (184 loc) · 5.45 KB
/
Functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# FUNCTIONS
# Installs all packages needed for the code to run.
install_pckg <- function() {
# Data compilation
install.packages("tidyverse")
install.packages("readxl")
install.packages("dplyr")
install.packages("zoo")
install.packages("rlist")
install.packages("data.table")
install.packages("stringr")
install.packages("stringi")
install.packages("anytime")
# Data filtering
install.packages("rgdal")
# Visualizations
install.packages("ggplot2")
install.packages("RColorBrewer")
install.packages("writexl")
install.packages("plotly")
install.packages("sf")
install.packages("ggrepel")
install.packages("maps")
install.packages("mapproj")
install.packages("shinythemes")
# Coding cleanup (delete later)
install.packages("styler")
install.packages("lintr")
}
# Loads all packages needed for the code to run.
load_pckg <- function() {
# Data compilation
library(tidyverse)
library(readxl)
library(dplyr)
library(zoo)
library(rlist)
library(data.table)
library(stringr)
library(stringi)
library(anytime)
# Data filtering
library(rgdal)
# Visualizations
library(ggplot2)
library(RColorBrewer)
library(writexl)
library(plotly)
library(sf)
library(ggrepel)
library(maps)
library(mapproj)
library(shinythemes)
# Coding cleanup (delete later)
library(styler)
library(lintr)
}
# Returns a list of dataframes from a given excel spreadsheet, where each
# dataframe is an excel sheet.
#
# xlsx = path to spreadsheet (ex: "./Data/Original Data/KCFS 2019.xlsx")
# start = starting sheet number (ex: 3)
# skip_row = number of top rows to ignore (ex: 2)
xlsx_to_list <- function(xlsx, start, skip_row) {
# Set up return list
df_list <- list()
# Get list of sheet names
sheet_names <- excel_sheets(path=xlsx)
# Get number of sheets
num_sheets <- length(sheet_names)
# Get range of relevant sheets
sheets <- start:num_sheets
# Add each sheet to the return list
for (sheet in sheets) {
# Temporarily store sheet
temp <- read_excel(xlsx,
skip = skip_row,
sheet = sheet_names[sheet])
# Offset this loop's index so that it matches the current index of the
# return list
offset_index <- sheet - (start - 1)
# Add sheet to proper index of return list
df_list[[offset_index]] <- temp
# Get the program name based off of the current sheet name
program_name <- sheet_names[[sheet]]
# Add program name to a new program name column in the return list
df_list[[offset_index]][["program_name"]] <- program_name
}
return(df_list)
}
# Returns a list of dataframes with date columns in consistent format. DATE
# COLUMN MUST BE NAMED `order_date`
#
# df_list = list of dataframes (ex: list)
consistent_date <- function(df_list) {
# Get number of dataframes in df list
num_dfs <- length(df_list)
# Fix dates in each df
for (df in 1:num_dfs) {
# Get current df
curr = df_list[[df]]
# Fix column differently based off of it's type
if (typeof(curr$order_date) == "character") {
df_list[[df]]$order_date = as.Date(
as.numeric(curr$order_date), origin = "1899-12-30")
} else {
df_list[[df]]$order_date = as.Date(curr$order_date)
}
}
return(df_list)
}
# Returns a dataframe, where the pounds purchased column is prepared to be
# evaluable in excel (for fixing pounds purchased). COLUMN MUST BE NAMED
# `pounds_purchased`.
#
# df = dataframe
fix_lbs_purchased <- function(df) {
df <- strip_unnecessary(df)
df <- non_num_to_plus(df)
return(df)
}
# Removes unnecessary numbers by removing everything after the '=', '(', and
# '$' signs (inclusive).
#
# df = dataframe
strip_unnecessary <- function(df) {
remove <- "\\(.*|=.*|\\$.*| +$"
return <- df %>%
mutate(pounds_purchased=gsub(pattern=remove,
replacement = "",
df$pounds_purchased))
return(return)
}
# Replaces characters between numbers with a single '+'.
#
# df = dataframe
non_num_to_plus <- function(df) {
# Replace non-numbers with spaces
remove <- "[^0-9.-]"
df <- df %>%
mutate(pounds_purchased=gsub(pattern=remove,
replacement = " ",
df$pounds_purchased))
# Trim whitespace
df <- df %>%
mutate(pounds_purchased=str_trim(str_squish(df$pounds_purchased)))
# Replace spaces with `+`
df <- df %>%
mutate(pounds_purchased=gsub(pattern=" ",
replacement="+",
df$pounds_purchased))
return(df)
}
# Returns a dataframe with a single column of unique, alphabetically-sorted
# farm names from given dataframe (for fixing farm names). COLUMN NAME MUST BE
# `farm_name`.
#
# df = dataframe
get_unq_farms <- function(df) {
df <- df %>%
select(farm_name) %>%
unique() %>%
arrange(farm_name)
return(df)
}
# Returns a dataframe with coordinates from given dataframe separated into `lon`
# and `lat` columns. COLUMN NAME MUST BE `coordinates`.
#
# df = dataframe
sep_coords <- function(df) {
df <- df %>%
mutate(lat = as.numeric(gsub("^(.*?),.*", "\\1", coordinates)),
lon = as.numeric(sub("^.*?,", "", coordinates)))
return(df)
}
# Returns a dataframe, replacing the order date column with order month data.
# COLUMN NAME MUST BE `order_date`.
#
# df = dataframe
get_order_month <- function(df) {
df <- df %>%
mutate(order_date=lubridate::floor_date(order_date, "month"))
return(df)
}