-
Notifications
You must be signed in to change notification settings - Fork 0
/
Assignment 1 - Garrett Ramela.R
115 lines (101 loc) · 3.97 KB
/
Assignment 1 - Garrett Ramela.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Foundations of AI
# Assignment 1
# Garrett Ramela
# Ensure that your current working directory is set up properly.
# setwd("~/Documents/George Washington University/Foundations of AI/Assignment 1")
# Read the College.csv data set into RStudio.
college <- read.csv('College.csv')
# Add a new column named rownames that R will not perform calculations on.
rownames(college) = college[ ,1]
fix(college)
# Remove the college column and only show the rownames column.
college = college [ , -1]
fix(college)
# Using the summary, head, and View functions to get a feel for the college data set.
summary(college)
head(college)
View(college)
# Create a series of plots across the numerical variables within the college data set after
# making the first column a numerical field.
college[, 1] = as.numeric(factor(college[, 1]))
pairs(college[,1:10])
# Use the boxplot() function to produce side-by-side box plots of Outstate versus Private
boxplot(Outstate ~ Private,
xlab = "No = Public School / Yes = Private School",
ylab = "Out-of-State Tuition Rate",
col = "Blue")
# Creating variable Elite that includes the top 10 percent of students from high schools and
# append the field to the existing college data set.
Elite = rep("No", nrow(college))
Elite[Top10perc > 50] = "Yes"
Elite = as.factor(Elite)
college = data.frame(college, Elite)
# Print a summary of the new college data set with the Elite column.
summary(college)
# Use the boxplot() function to produce side-by-side box plots of Outstate versus the new Elite variable.
boxplot(Outstate ~ Elite,
xlab = "No = Not Elite School / Yes = Elite School",
ylab = "Out-of-State Tuition Rate",
col = "Blue")
# Print a series of six (6) histrograms showing distributions of applicants, accepted students,
# percentage of PhDs, student/faculty ratio, percent of alumni who donate, and graduation rate.
par(mfrow = c(2, 3))
hist(Apps,
main = "Student Applicants",
xlab = "Student Applicants",
col = "Blue",
breaks = 50)
hist(Accept,
main = "Accepted Students",
xlab = "Accepted Students",
col = "Blue",
breaks = 50)
hist(PhD,
main = "PhD Faculty Percentage",
xlab = "PhD Faculty Percentage",
col = "Blue",
breaks = 50)
hist(S.F.Ratio,
main = "Student/Faculty Ratio",
xlab = "Student/Faculty Ratio",
col = "Blue",
breaks = 50)
hist(perc.alumni,
main = "Donating Alumni Percentage",
xlab = "Donating Alumni Percentage",
col = "Blue",
breaks = 50)
hist(Grad.Rate,
main = "Graduation Rate",
xlab = "Graduation Rate",
col = "Blue",
breaks = 50)
# Use the plot() function to produce side-by-side box plots of Outstate versus Private
library(ggplot2)
ggplot(college, aes(x = S.F.Ratio, y = Outstate)) +
geom_point(aes(color = Private, shape = Elite)) +
geom_smooth(method = lm) +
labs(title = "Student/Faculty Ratio Effect on Tuition",
subtitle = "Grouped Across Public & Public Schools",
x = "Student/Faculty Ratio",
y = "Out-of-State Tuition Rate",
color = "Private School",
shape = "Elite School")
ggplot(college, aes(x = PhD, y = Outstate)) +
geom_point(aes(color = Private, shape = Elite)) +
geom_smooth(method = lm) +
labs(title = "Faculty PhD Percentage Effect on Tuition",
subtitle = "Grouped Across Public & Public Schools",
x = "Faculty with PhDs Percentage",
y = "Out-of-State Tuition Rate",
color = "Private School",
shape = "Elite School")
ggplot(college, aes(x = perc.alumni, y = Outstate)) +
geom_point(aes(color = Private, shape = Elite)) +
geom_smooth(method = lm) +
labs(title = "Donating Alumni Percentage Effect on Tuition",
subtitle = "Grouped Across Public & Public Schools",
x = "Donating Alumni Percentage",
y = "Out-of-State Tuition Rate",
color = "Private School",
shape = "Elite School")