From 093a293532ea47b87dd55de9d82692fc9c049306 Mon Sep 17 00:00:00 2001
From: taivop <taivo.pungas@gmail.com>
Date: Mon, 5 Dec 2016 13:28:01 +0100
Subject: [PATCH] work on #170

---
 report/milestone3.tex |  7 +++++++
 scripts/r/part1_mm1.r | 47 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/report/milestone3.tex b/report/milestone3.tex
index 3377684..a80f129 100644
--- a/report/milestone3.tex
+++ b/report/milestone3.tex
@@ -79,6 +79,11 @@ \section{System as One Unit}\label{sec:system-one-unit}
 
 Build an M/M/1 model of your entire system based on the stability trace that you had to run for the first milestone. Explain the characteristics and behavior of the model built, and compare it with the experimental data (collected both outside and inside the middleware). Analyze the modeled and real-life behavior of the system (explain the similarities, the differences, and map them to aspects of the design or the experiments). Make sure to follow the model-related guidelines described in the Notes!
 
+\todo{mention where data came from (new trace from MS2)}
+
+problems: assumption that arrival rate is independent doesn't hold (closed system => memaslap sends new request as soon as it receives response)
+
+took service rate to be max of throughput and arrival rate as mean of throughput
 
 
 \clearpage
@@ -130,6 +135,8 @@ \section{Interactive Law Verification}\label{sec:interactive-law}
 
 mention mean error
 
+\todo{also show graphs as function of parameters (like in ms2 exp2)?}
+
 \todo{why do I predict lower throughput?} because total cycle time is higher than it should for given throughput -- but why? since measuring is done by memaslap, probably the problem lies on that side
 
 \begin{figure}[h]
diff --git a/scripts/r/part1_mm1.r b/scripts/r/part1_mm1.r
index ee44648..c39fe2f 100644
--- a/scripts/r/part1_mm1.r
+++ b/scripts/r/part1_mm1.r
@@ -3,4 +3,51 @@ source("scripts/r/ms3_common.r")
 
 # ---- Directories ----
 output_dir <- "results/analysis/part1_mm1"
+trace_dir <- "results/trace_rep3"
+
+# ---- Reading data ----
+memaslap <- file_to_df(paste0(trace_dir, "/memaslap_stats.csv"))
+requests <- file_to_df(paste0(trace_dir, "/request.log"), sep=",")
+
+
+# ---- Preprocessing ----
+DROP_TIMES_BEFORE_MS = 2 * 60 # How many seconds in the beginning we want to drop
+DROP_TIMES_AFTER_MS = max((memaslap %>% filter(type=="t"))$time) - 2 * 60
+
+first_request_time <- min(requests$timeCreated)
+last_request_time <- max(requests$timeCreated)
+DROP_TIMES_BEFORE_MW = first_request_time + 2 * 60 * 1000
+DROP_TIMES_AFTER_MW = last_request_time - 2 * 60 * 1000
+
+requests <- requests %>%
+  filter(timeCreated > DROP_TIMES_BEFORE_MW & timeCreated <= DROP_TIMES_AFTER_MW)
+
+# ------------------
+# ---- Analysis ----
+# ------------------
+WINDOW_SIZE <- 1 # seconds
+service_rates <- requests %>%
+  mutate(secondCreated=floor(timeCreated/1000/WINDOW_SIZE)) %>%
+  group_by(secondCreated) %>%
+  summarise(count=n()) %>%
+  arrange(desc(count))
+
+# ---- Parameters ----
+# TODO atm using kind of a shitty way to calc params
+service_rate <- max(service_rates$count) / WINDOW_SIZE * 100 # 1/100 sampling
+arrival_rate <- mean(service_rates$count) / WINDOW_SIZE * 100 # 1/100 sampling
+traffic_intensity = arrival_rate / service_rate
+print(paste0("Traffic intensity: ", round(traffic_intensity, digits=2)))
+
+# ---- Predictions ----
+predicted = list()
+actual = list()
+predicted$mean_num_jobs_in_system <- traffic_intensity / (1-traffic_intensity)
+predicted$utilisation <- 1 - traffic_intensity
+predicted$mean_response_time <- 1 / (service_rate) / (1 - traffic_intensity)
+predicted$response_time_q50 <- predicted$mean_response_time * log(100 / (100-50))
+predicted$response_time_q95 <- predicted$mean_response_time * log(100 / (100-95))
+
+print(predicted)
+