-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculate_statistics_400m.R
73 lines (53 loc) · 1.88 KB
/
calculate_statistics_400m.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# read in the cleaned data
iaaf <- read.csv("women_iaaf_400m_2011_2013_wiki_clean.csv", as.is = TRUE)
attach(iaaf)
# remove duplicates within each year by bib
# keep the minimum time for each athlete
# 2011
inds_year <- year == 2011
athletelist <- unique( athlete[ inds_year ] )
min_athlete <- c()
min_times <- c()
for(j in 1:length(athletelist) ){
inds <- which( athlete[ inds_year ] == athletelist[j] )
mintime <- min( time[ inds_year ][inds], na.rm = TRUE )
min_athlete <- c( min_athlete, athlete[inds[1]] )
min_times <- c(min_times, mintime )
}
min_times[ min_times == Inf ] <- NA
min2011 <- data.frame( year = 2011, min_athlete, min_times )
# 2013
inds_year <- year == 2013
athletelist <- unique( athlete[ inds_year ] )
min_athlete <- c()
min_times <- c()
for(j in 1:length(athletelist) ){
inds <- which( athlete[ inds_year ] == athletelist[j] )
mintime <- min( time[ inds_year ][inds], na.rm = TRUE )
min_athlete <- c( min_athlete, athlete[ inds_year ][inds[1]] )
min_times <- c(min_times, mintime )
}
min_times[ min_times == Inf ] <- NA
min2013 <- data.frame( year = 2013, min_athlete, min_times )
# combine the results
min_results <- rbind( min2011, min2013 )
# statistics for all athletes
print( sum( !is.na(min_results$min_times) ) )
print( mean( min_results$min_times, na.rm = TRUE ) )
print( sd( min_results$min_times, na.rm = TRUE ) )
# permutation test to get more accurate p-value
notna <- !is.na( min_results$min_times )
y <- min_results$min_times[notna]
ny <- length(y)
group_sizes <- c( 24, 23, 24 )
group_ends <- cumsum(group_sizes)
nsim <- 1000000
diff_vec <- rep(NA,nsim)
for(j in 1:nsim){
ord <- sample(ny)
group1 <- y[ord[1:group_ends[1]]]
group3 <- y[ord[(group_ends[2]+1):group_ends[3]]]
diff_vec[j] <- mean(group1) - mean(group3)
}
observed_diff <- 52.60 - 51.16
print( mean( diff_vec > observed_diff ) )