#171 data analysis + figures into report

taivop · Dec 7, 2016 · 946f4ad · 946f4ad
1 parent d0855b4
commit 946f4ad
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 21 deletions.
diff --git a/report/milestone3.pdf b/report/milestone3.pdf
diff --git a/report/milestone3.tex b/report/milestone3.tex
@@ -151,6 +151,31 @@ \subsection{Model}
 
 \subsection{Comparison of model and experiments}
 
+\todo{}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=0.5\textwidth]{../results/analysis/part2_mmm/graphs/traffic_intensity_vs_clients.pdf}
+\caption{\todo{}}
+\label{fig:part2:trafficintensity}
+\end{figure}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=\textwidth]{../results/analysis/part2_mmm/graphs/response_time_predicted_and_actual.pdf}
+\caption{\todo{} note difference in scale}
+\label{fig:part2:responsetime}
+\end{figure}
+
+\begin{figure}[h]
+\centering
+\includegraphics[width=\textwidth]{../results/analysis/part2_mmm/graphs/number_of_jobs_predicted_and_actual.pdf}
+\caption{\todo{}}
+\label{fig:part2:numberofjobs}
+\end{figure}
+
+\input{../results/analysis/part2_mmm/comparison_table.txt}
+
 
 \clearpage
 % --------------------------------------------------------------------------------

diff --git a/scripts/r/ms3_common.r b/scripts/r/ms3_common.r
@@ -116,7 +116,21 @@ get_mmm_weird_rho <- function(m, rho, p0) {
   return(p0 * (m * rho)^m / (factorial(m) * (1 - rho)))
 }
 
+get_mmm_response_time_mean <- function(rho, weird_rho, mu, m) {
+  return(1 / mu * (1 + weird_rho / (m * (1 - rho))))
+}
+
+get_mmm_response_time_std <- function(rho, weird_rho, mu, m) {
+  return(sqrt(1 / mu^2 * (1 + (weird_rho * (2 - weird_rho))/(m^2 * (1-rho)^2))))
+}
 
+get_mmm_num_jobs_in_system_mean <- function(rho, weird_rho, mu, m) {
+  return(m * rho + rho * weird_rho / (1 - rho))
+}
+
+get_mmm_num_jobs_in_system_std <- function(rho, weird_rho, mu, m) {
+  return(sqrt(m * rho + rho * weird_rho * ((1 + rho - rho * weird_rho)/((1 - rho)^2) + m)))
+}
 
 
 

diff --git a/scripts/r/part2_mmm.r b/scripts/r/part2_mmm.r
@@ -32,7 +32,8 @@ get_mmm_summary <- function(results_dir) {
   # ------------------
   m <- 5
   WINDOW_SIZE <- 1 # seconds
-  SAMPLING_RATE <- 10 # from exp2 setup
+  SAMPLING_RATE <- 100 # from exp1 setup
+  MAX_THROUGHPUT <- 26500 # from exp1: clients=576, repetition=0
   service_rates <- requests %>%
     mutate(secondCreated=floor(timeCreated/1000/WINDOW_SIZE)) %>%
     group_by(secondCreated) %>%
@@ -41,7 +42,7 @@ get_mmm_summary <- function(results_dir) {
 
   # ---- Parameters ----
   arrival_rate <- mean(service_rates$count) / WINDOW_SIZE * SAMPLING_RATE
-  total_service_rate <- max(service_rates$count) / WINDOW_SIZE * SAMPLING_RATE
+  total_service_rate <- MAX_THROUGHPUT # max(service_rates$count) / WINDOW_SIZE * SAMPLING_RATE
   single_service_rate <- total_service_rate / m
   rho <- arrival_rate / total_service_rate    # traffic intensity
   p0 <- get_mmm_p0(m, rho)                    # prob. of 0 jobs in system
@@ -52,17 +53,23 @@ get_mmm_summary <- function(results_dir) {
   # ---- Predictions ---- # TODO
   predicted = list()
   predicted$type <- "predicted"
-  predicted$mean_num_jobs_in_system <- m * rho + rho * weird_rho / (1 - rho)
-  predicted$std_num_jobs_in_system <-
-    m * rho + rho * weird_rho * ((1 + rho - rho * weird_rho)/((1 - rho)^2) + m)
-  predicted$mean_num_jobs_in_queue <- rho * weird_rho * (1 - rho)
+  predicted$traffic_intensity <- rho
+  predicted$num_jobs_in_system_mean <- 
+    get_mmm_num_jobs_in_system_mean(rho, weird_rho, mu, m)
+  predicted$num_jobs_in_system_std <-
+    get_mmm_num_jobs_in_system_std(rho, weird_rho, mu, m)
+  predicted$num_jobs_in_queue_mean <- rho * weird_rho * (1 - rho)
   predicted$utilisation <- rho
-  predicted$mean_response_time <-
-    1 / single_service_rate * (1 + weird_rho / (m * (1 - rho))) * 1000 # ms
+  predicted$response_time_mean <-
+    get_mmm_response_time_mean(rho, weird_rho, single_service_rate, m) * 1000 # ms
+  predicted$response_time_std <-
+    get_mmm_response_time_std(rho, weird_rho, single_service_rate, m) * 1000 # ms
   Ew <- weird_rho / (m * single_service_rate * (1 - rho))
   # predicted$mean_waiting_time <- Ew
   predicted$response_time_q50 <- max(0, Ew / weird_rho * log(weird_rho / (1 - 0.5))) * 1000 # ms
   predicted$response_time_q95 <- max(0, Ew / weird_rho * log(weird_rho / (1 - 0.95))) * 1000 # ms
+  predicted$arrival_rate <- NA
+  predicted$total_service_rate <- NA
 
   # ---- Actual results ----
   actual = list()
@@ -86,13 +93,17 @@ get_mmm_summary <- function(results_dir) {
   response_times <- requests$timeReturned - requests$timeEnqueued
 
   actual$type <- "actual"
-  actual$mean_num_jobs_in_system <- means$total
-  actual$std_num_jobs_in_system <- sum(distributions$total * (distributions$num_elements-means$total)^2)
-  actual$mean_num_jobs_in_queue <- means$queue
-  actual$utilisation <- 1 - (distributions %>% filter(num_elements==0))$total
-  actual$mean_response_time <- mean(response_times)
+  actual$traffic_intensity <- NA
+  actual$num_jobs_in_system_mean <- means$total
+  actual$num_jobs_in_system_std <- sum(distributions$total * (distributions$num_elements-means$total)^2)
+  actual$num_jobs_in_queue_mean <- means$queue
+  actual$utilisation <- NA # can't measure this on a per-server basis here
+  actual$response_time_mean <- mean(response_times)
+  actual$response_time_std <- sd(response_times)
   actual$response_time_q50 <- quantile(response_times, probs=c(0.5))
   actual$response_time_q95 <- quantile(response_times, probs=c(0.95))
+  actual$arrival_rate <- arrival_rate
+  actual$total_service_rate <- total_service_rate
 
   comparison <- rbind(data.frame(predicted), data.frame(actual)) %>%
     mutate(clients=result_params$clients[[1]],
@@ -107,15 +118,23 @@ result_dir_base <- "results/throughput"
 
 # ---- Extracting data
 dir_name_regex <- paste0(result_dir_base,
-                          "/clients(\\d{2,3})_threads(32)_rep(0)$")
+                          "/clients(\\d{1,3})_threads(32)_rep(0)$")
 unfiltered_dirs <- list.dirs(path=result_dir_base, recursive=TRUE)
 filtered_dirs <- grep(dir_name_regex, unfiltered_dirs, value=TRUE, perl=TRUE)
 
 comparisons <- NA
 for(i in 1:length(filtered_dirs)) {
   dirname = filtered_dirs[i]
+  dirname_match <- grep(dir_name_regex, dirname, value=TRUE, perl=TRUE)
+  n_clients <- as.numeric(as.character(as.data.frame(str_match(dirname_match, dir_name_regex))$V2))
   print(paste0("DIR: ", dirname))
-  summary <- get_mmm_summary(dirname)
+
+  if(n_clients == 1 | n_clients == 180 | n_clients > 576) {
+    print("skipping")
+    next
+  } else {
+    summary <- get_mmm_summary(dirname)
+  }
 
   if(is.na(comparisons)) {
     comparisons <- summary
@@ -124,12 +143,60 @@ for(i in 1:length(filtered_dirs)) {
   }
 }
 
+# Saving table
+comparisons_to_save <- comparisons %>%
+  select(type, clients, num_jobs_in_system_mean:response_time_std) %>%
+  select(-utilisation) %>%
+  melt(id.vars=c("type", "clients")) %>%
+  dcast(variable + clients ~ type)
+comparison_table <- xtable(comparisons_to_save, caption="Comparison of experimental results and predictions of the M/M/m model.",
+                           label="tbl:part2:comparison_table")
+print(comparison_table, file=paste0(output_dir, "/comparison_table.txt"))
+
 
 # ---- Plotting ----
-data1 <- comparisons %>%
-  filter(clients != 180)
-ggplot(data1, aes(x=clients, y=mean_response_time, color=type)) +
-  geom_line() +
-  asl_theme
-
 
+# Traffic intensity
+ggplot(comparisons %>% filter(type=="predicted"), aes(x=clients, y=traffic_intensity, color=type)) +
+  geom_line(size=1) +
+  geom_point(size=2) +
+  ylim(0, 1) +
+  xlab("Number of clients") +
+  ylab("Traffic intensity") +
+  asl_theme +
+  theme(legend.position="none")
+ggsave(paste0(output_dir, "/graphs/traffic_intensity_vs_clients.pdf"),
+       width=fig_width/2, height=fig_height/2)
+
+# Mean response time
+ggplot(comparisons, aes(x=clients, y=response_time_mean, color=type, fill=type)) +
+  geom_ribbon(aes(ymin=response_time_mean-response_time_std,
+                  ymax=response_time_mean+response_time_std),
+              alpha=0.3, color=NA) +
+  geom_line(size=1) +
+  geom_point(size=2) +
+  facet_wrap(~type, scales="free_y", nrow=1) +
+  #ylim(0, NA) +
+  xlab("Number of clients") +
+  ylab("Mean response time") +
+  asl_theme +
+  theme(legend.position="none")
+ggsave(paste0(output_dir, "/graphs/response_time_predicted_and_actual.pdf"),
+       width=fig_width, height=0.75 * fig_height)
+
+# Mean number of jobs in system
+ggplot(comparisons, aes(x=clients, y=num_jobs_in_system_mean, color=type, fill=type)) +
+  geom_ribbon(aes(ymin=num_jobs_in_system_mean-num_jobs_in_system_std,
+                  ymax=num_jobs_in_system_mean+num_jobs_in_system_std),
+              alpha=0.3, color=NA) +
+  geom_line(size=1) +
+  geom_point(size=2) +
+  facet_wrap(~type, nrow=1) +
+  ylim(0, NA) +
+  xlab("Number of clients") +
+  ylab("Mean number of jobs in system") +
+  asl_theme +
+  theme(legend.position="none")
+ggsave(paste0(output_dir, "/graphs/number_of_jobs_predicted_and_actual.pdf"),
+       width=fig_width, height=0.75 * fig_height)
+