-
Notifications
You must be signed in to change notification settings - Fork 11
/
overfit.R
113 lines (74 loc) · 3.24 KB
/
overfit.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# load required libraries
library(car)
# Set the working directory
setwd("C:/Users/pm27995/OneDrive - The University of Texas at Austin/Courses/PGE337_new/R/Bivariate")
# Load the data
mydata = read.csv("Bivariate_model_fit_check_data.csv") # read csv file
mydata
# Sampling Array in Depth
Depth <- (1:100)
depth_int_df <- data.frame(Depth)
# Extract fines and depth vectors
Fines <- mydata$Fines
Fines = (1-Fines)*0.3*100
Depth <- mydata$Depth
mydata$Fines = Fines
# Matrix for plots
par(mfrow=c(3,2))
# Prediction grid
coords <- seq(from=0, to=100, by=0.1)
var(Fines)
# Model 1: Plot for depth and fines
plot(mydata$Depth,mydata$Fines,xlab=" Depth (m) ",ylab=" Porosity (%) ")
title("Model 1: 2nd Order Polynomial")
# Linear Regression Example
fit <- lm(Fines ~ poly(Depth,2), data=mydata)
# Confidence interval for model
newx <- seq(min(mydata$Depth), max(mydata$Depth), length.out=100)
preds <- predict(fit, newdata = data.frame(Depth=newx),interval = 'confidence')
#plot(Fines ~ Depth, data = mydata, type = 'n')
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
lines(newx, preds[ ,3], lty = 'dashed',main="Distribution of Residuals", col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')
lines(newx, preds[ ,1], col = 'black')
points(mydata$Depth,mydata$Fines)
#Calculate and plot residuals
resid = residuals(fit)
var(resid)
hist(resid, freq=FALSE, main="Distribution of Residuals",xlim=c(-20,20))
# Model 2: Plot for depth and fines
plot(mydata$Depth,mydata$Fines,xlab=" Depth (m) ",ylab=" Porosity (%) ")
title("Model 2: 5th Order Polynomial")
# Linear Regression Example
fit <- lm(Fines ~ poly(Depth,5), data=mydata)
# Confidence interval for model
newx <- seq(min(mydata$Depth), max(mydata$Depth), length.out=100)
preds <- predict(fit, newdata = data.frame(Depth=newx),interval = 'confidence')
#plot(Fines ~ Depth, data = mydata, type = 'n')
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
lines(newx, preds[ ,3], lty = 'dashed', col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')
lines(newx, preds[ ,1], col = 'black')
points(mydata$Depth,mydata$Fines)
#Calculate and plot residuals
resid = residuals(fit)
var(resid)
hist(resid, freq=FALSE, main="Distribution of Residuals",xlim=c(-20,20))
# Model 3: Plot for depth and fines
plot(mydata$Depth,mydata$Fines,xlab=" Depth (m) ",ylab=" Porosity (%) ")
title("Model 3: 8th Order Polynomial")
# Linear Regression Example
fit <- lm(Fines ~ poly(Depth,8), data=mydata)
# Confidence interval for model
newx <- seq(min(mydata$Depth), max(mydata$Depth), length.out=100)
preds <- predict(fit, newdata = data.frame(Depth=newx),interval = 'confidence')
#plot(Fines ~ Depth, data = mydata, type = 'n')
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
lines(newx, preds[ ,3], lty = 'dashed', col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')
lines(newx, preds[ ,1], col = 'black')
points(mydata$Depth,mydata$Fines)
#Calculate and plot residuals
resid = residuals(fit)
var(resid)
hist(resid, freq=FALSE, main="Distribution of Residuals",xlim=c(-20,20))