Skip to content

Commit

Permalink
Merge pull request #295 from pantheon-systems/stage
Browse files Browse the repository at this point in the history
Stage to master: stats via statsd rather than logs
  • Loading branch information
jerryblakley committed May 13, 2016
2 parents c4e92d4 + b8ee517 commit 56451f2
Show file tree
Hide file tree
Showing 15 changed files with 550 additions and 273 deletions.
3 changes: 2 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ fusedav_SOURCES=fusedav.c fusedav.h \
util.c util.h \
fusedav_config.c fusedav_config.h \
signal_handling.c signal_handling.h \
stats.c stats.h
stats.c stats.h \
fusedav-statsd.c fusedav-statsd.h

fusedav_CFLAGS = $(AM_CFLAGS) $(CURL_CFLAGS) $(URIPARSER_CFLAGS) $(FUSE_CFLAGS) $(YAML_CFLAGS) $(LEVELDB_CFLAGS) $(SYSTEMD_CFLAGS) $(ZLIB_CFLAGS) $(GLIB_CFLAGS) -DFUSE_USE_VERSION=26 -DINJECT_ERRORS=${INJECT_ERRORS}
fusedav_LDADD = -lpthread -ljemalloc -lrt -lresolv -lexpat $(CURL_LIBS) $(URIPARSER_LIBS) $(FUSE_LIBS) $(YAML_LIBS) $(LEVELDB_LIBS) $(SYSTEMD_LIBS) $(ZLIB_LIBS) $(GLIB_LIBS)
105 changes: 36 additions & 69 deletions src/filecache.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "stats.h"
#include "session.h"
#include "fusedav_config.h"
#include "fusedav-statsd.h"

#define REFRESH_INTERVAL 3
#define CACHE_FILE_ENTROPY 20
Expand Down Expand Up @@ -141,14 +142,14 @@ static char *path2key(const char *path) {
* to enhanced logging to LOG_NOTICE.
*/
struct curl_slist* enhanced_logging(struct curl_slist *slist, int log_level, int section, const char *format, ...) {
va_list ap;

// If we are injecting errors, we can trigger enhanced logging by decrementing
// the log level (LOG_INFO -> LOG_NOTICE)
if (inject_error(filecache_error_enhanced_logging)) {
log_level -= 1;
}
if (logging(log_level, section)) {
va_list ap;
char *user_agent = NULL;
char msg[81] = {0};
slist = curl_slist_append(slist, "Log-To-Journal: true");
Expand Down Expand Up @@ -363,13 +364,8 @@ static void get_fresh_fd(filecache_t *cache,
struct timespec start_time;
long response_code = 500; // seed it as bad so we can enter the loop
CURLcode res = CURLE_OK;
static __thread unsigned long lgcount = 0;
static __thread long lglatency = 0;
static __thread time_t lgtime = 0;
static __thread unsigned long sgcount = 0;
static __thread long sglatency = 0;
static __thread time_t sgtime = 0;
// Not to exceed time for operation, else it's an error. Allow large files a longer time
// Somewhat arbitrary
static const unsigned small_time_allotment = 2000; // 2 seconds
static const unsigned large_time_allotment = 8000; // 8 seconds

Expand Down Expand Up @@ -652,80 +648,68 @@ static void get_fresh_fd(filecache_t *cache,
latency = FETCH(filecache_get_xlg_timing);
count = FETCH(filecache_get_xlg_count);
sz = "XLG";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lgtime,
"large-gets", &lgcount, 1,
"large-get-latency", &lglatency, elapsed_time);
stats_counter("large-gets", 1);
stats_timer("large-get-latency", elapsed_time);
}
else if (st.st_size > LG) {
TIMING(filecache_get_lg_timing, elapsed_time);
BUMP(filecache_get_lg_count);
latency = FETCH(filecache_get_lg_timing);
count = FETCH(filecache_get_lg_count);
sz = "LG";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lgtime,
"large-gets", &lgcount, 1,
"large-get-latency", &lglatency, elapsed_time);
stats_counter("large-gets", 1);
stats_timer("large-get-latency", elapsed_time);
}
else if (st.st_size > MED) {
TIMING(filecache_get_med_timing, elapsed_time);
BUMP(filecache_get_med_count);
latency = FETCH(filecache_get_med_timing);
count = FETCH(filecache_get_med_count);
sz = "MED";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lgtime,
"large-gets", &lgcount, 1,
"large-get-latency", &lglatency, elapsed_time);
stats_counter("large-gets", 1);
stats_timer("large-get-latency", elapsed_time);
}
else if (st.st_size > SM) {
TIMING(filecache_get_sm_timing, elapsed_time);
BUMP(filecache_get_sm_count);
latency = FETCH(filecache_get_sm_timing);
count = FETCH(filecache_get_sm_count);
sz = "SM";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sgtime,
"small-gets", &sgcount, 1,
"small-get-latency", &sglatency, elapsed_time);
stats_counter("small-gets", 1);
stats_timer("small-get-latency", elapsed_time);
}
else if (st.st_size > XSM) {
TIMING(filecache_get_xsm_timing, elapsed_time);
BUMP(filecache_get_xsm_count);
latency = FETCH(filecache_get_xsm_timing);
count = FETCH(filecache_get_xsm_count);
sz = "XSM";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sgtime,
"small-gets", &sgcount, 1,
"small-get-latency", &sglatency, elapsed_time);
stats_counter("small-gets", 1);
stats_timer("small-get-latency", elapsed_time);
}
else {
TIMING(filecache_get_xxsm_timing, elapsed_time);
BUMP(filecache_get_xxsm_count);
latency = FETCH(filecache_get_xxsm_timing);
count = FETCH(filecache_get_xxsm_count);
sz = "XXSM";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sgtime,
"small-gets", &sgcount, 1,
"small-get-latency", &sglatency, elapsed_time);
stats_counter("small-gets", 1);
stats_timer("small-get-latency", elapsed_time);
}
log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "put_fresh_fd: GET on size %s (%lu) for %s -- Current:Average latency %lu :: %lu",
sz, st.st_size, path, elapsed_time, (latency / count));

/* The aggregate_log_print_server routine is expecting a cumulative count and latency, but by
* passing NULL for time, we ensure that aggregate prints this message and resets to 0, so
* there's no need to keep a static variable for count and latency.
*/
if (st.st_size >= LG && elapsed_time > large_time_allotment) {
unsigned long exceeded_count = 0;
log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "put_fresh_fd: large (%lu) GET for %s exceeded time allotment %lu with %lu",
st.st_size, path, large_time_allotment, elapsed_time);
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "put_fresh_fd", NULL, "exceeded-time-large-GET-count",
&exceeded_count, 1, "exceeded-time-large-GET-latency", &elapsed_time, 0);
stats_counter("exceeded-time-large-GET-count", 1);
stats_timer("exceeded-time-large-GET-latency", elapsed_time);
}
else if (st.st_size < LG && elapsed_time > small_time_allotment) {
unsigned long exceeded_count = 0;
log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "put_fresh_fd: small (%lu) GET for %s exceeded time allotment %lu with %lu",
st.st_size, path, small_time_allotment, elapsed_time);
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "put_fresh_fd", NULL, "exceeded-time-small-GET-count",
&exceeded_count, 1, "exceeded-time-small-GET-latency", &elapsed_time, 0);
stats_counter("exceeded-time-small-GET-count", 1);
stats_timer("exceeded-time-small-GET-latency", elapsed_time);
}
}
else if (response_code == 404) {
Expand Down Expand Up @@ -1006,13 +990,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr)
struct timespec start_time;
long response_code = 500; // seed it as bad so we can enter the loop
CURLcode res = CURLE_OK;
static __thread unsigned long lpcount = 0;
static __thread long lplatency = 0;
static __thread time_t lptime = 0;
static __thread unsigned long spcount = 0;
static __thread long splatency = 0;
static __thread time_t sptime = 0;
// Not to exceed time for operation, else it's an error. Allow large files a longer time
// Somewhat arbitrary
static const unsigned small_time_allotment = 2000; // 2 seconds
static const unsigned large_time_allotment = 8000; // 8 seconds

Expand Down Expand Up @@ -1135,80 +1114,68 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr)
latency = FETCH(filecache_put_xlg_timing);
count = FETCH(filecache_put_xlg_count);
sz = "XLG";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lptime,
"large-puts", &lpcount, 1,
"large-put-latency", &lplatency, elapsed_time);
stats_counter("large-puts", 1);
stats_timer("large-put-latency", elapsed_time);
}
else if (st.st_size > LG) {
TIMING(filecache_put_lg_timing, elapsed_time);
BUMP(filecache_put_lg_count);
latency = FETCH(filecache_put_lg_timing);
count = FETCH(filecache_put_lg_count);
sz = "LG";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lptime,
"large-puts", &lpcount, 1,
"large-put-latency", &lplatency, elapsed_time);
stats_counter("large-puts", 1);
stats_timer("large-put-latency", elapsed_time);
}
else if (st.st_size > MED) {
TIMING(filecache_put_med_timing, elapsed_time);
BUMP(filecache_put_med_count);
latency = FETCH(filecache_put_med_timing);
count = FETCH(filecache_put_med_count);
sz = "MED";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lptime,
"large-puts", &lpcount, 1,
"large-put-latency", &lplatency, elapsed_time);
stats_counter("large-puts", 1);
stats_timer("large-put-latency", elapsed_time);
}
else if (st.st_size > SM) {
TIMING(filecache_put_sm_timing, elapsed_time);
BUMP(filecache_put_sm_count);
latency = FETCH(filecache_put_sm_timing);
count = FETCH(filecache_put_sm_count);
sz = "SM";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sptime,
"small-puts", &spcount, 1,
"small-put-latency", &splatency, elapsed_time);
stats_counter("small-puts", 1);
stats_timer("small-put-latency", elapsed_time);
}
else if (st.st_size > XSM) {
TIMING(filecache_put_xsm_timing, elapsed_time);
BUMP(filecache_put_xsm_count);
latency = FETCH(filecache_put_xsm_timing);
count = FETCH(filecache_put_xsm_count);
sz = "XSM";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sptime,
"small-puts", &spcount, 1,
"small-put-latency", &splatency, elapsed_time);
stats_counter("small-puts", 1);
stats_timer("small-put-latency", elapsed_time);
}
else {
TIMING(filecache_put_xxsm_timing, elapsed_time);
BUMP(filecache_put_xxsm_count);
latency = FETCH(filecache_put_xxsm_timing);
count = FETCH(filecache_put_xxsm_count);
sz = "XXSM";
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sptime,
"small-puts", &spcount, 1,
"small-put-latency", &splatency, elapsed_time);
stats_counter("small-puts", 1);
stats_timer("small-put-latency", elapsed_time);
}
log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "%s: PUT on size %s (%lu) for %s -- Current:Average latency %lu :: %lu",
funcname, sz, st.st_size, path, elapsed_time, (latency / count));

/* The aggregate_log_print_server routine is expecting a cumulative count and latency, but by
* passing NULL for time, we ensure that aggregate prints this message and resets to 0, so
* there's no need to keep a static variable for count and latency.
*/
if (st.st_size >= LG && elapsed_time > large_time_allotment) {
unsigned long exceeded_count = 0;
log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "%s: large (%lu) PUT for %s exceeded time allotment %lu with %lu",
funcname, st.st_size, path, large_time_allotment, elapsed_time);
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, NULL, "exceeded-time-large-PUT-count",
&exceeded_count, 1, "exceeded-time-large-PUT-latency", &elapsed_time, 0);
stats_counter("exceeded-time-large-PUT-count", 1);
stats_timer("exceeded-time-large-PUT-latency", elapsed_time);
}
else if (st.st_size < LG && elapsed_time > small_time_allotment) {
unsigned long exceeded_count = 0;
log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "%s: small (%lu) PUT for %s exceeded time allotment %lu with %lu",
funcname, st.st_size, path, small_time_allotment, elapsed_time);
aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, NULL, "exceeded-time-small-PUT-count",
&exceeded_count, 1, "exceeded-time-small-PUT-latency", &elapsed_time, 0);
stats_counter("exceeded-time-small-PUT-count", 1);
stats_timer("exceeded-time-small-PUT-latency", elapsed_time);
}
}

Expand Down
Loading

0 comments on commit 56451f2

Please sign in to comment.