Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#5538 memtrace seek, part 3: Add reader buffering #5640

Merged
merged 2 commits into from
Sep 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 22 additions & 14 deletions clients/drcachesim/reader/compressed_file_reader.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
* Copyright (c) 2017-2020 Google, Inc. All rights reserved.
* Copyright (c) 2017-2022 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -35,44 +35,52 @@
/* clang-format off */ /* (make vera++ newline-after-type check happy) */
template <>
/* clang-format on */
file_reader_t<gzFile>::~file_reader_t<gzFile>()
file_reader_t<gzip_reader_t>::~file_reader_t<gzip_reader_t>()
{
for (auto file : input_files_)
gzclose(file);
for (auto &gzip : input_files_)
gzclose(gzip.file);
delete[] thread_eof_;
}

template <>
bool
file_reader_t<gzFile>::open_single_file(const std::string &path)
file_reader_t<gzip_reader_t>::open_single_file(const std::string &path)
{
gzFile file = gzopen(path.c_str(), "rb");
if (file == nullptr)
return false;
VPRINT(this, 1, "Opened input file %s\n", path.c_str());
input_files_.push_back(file);
input_files_.emplace_back(file);
return true;
}

template <>
bool
file_reader_t<gzFile>::read_next_thread_entry(size_t thread_index,
OUT trace_entry_t *entry, OUT bool *eof)
file_reader_t<gzip_reader_t>::read_next_thread_entry(size_t thread_index,
OUT trace_entry_t *entry,
OUT bool *eof)
{
int len = gzread(input_files_[thread_index], (char *)entry, sizeof(*entry));
// Returns less than asked-for for end of file, or –1 for error.
if (len < (int)sizeof(*entry)) {
*eof = (len >= 0);
return false;
gzip_reader_t *gzip = &input_files_[thread_index];
if (gzip->cur_buf >= gzip->max_buf) {
int len = gzread(gzip->file, gzip->buf, sizeof(gzip->buf));
// Returns less than asked-for for end of file, or –1 for error.
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
if (len < (int)sizeof(*entry)) {
*eof = (len >= 0);
return false;
}
gzip->cur_buf = gzip->buf;
gzip->max_buf = gzip->buf + (len / sizeof(*gzip->max_buf));
}
*entry = *gzip->cur_buf;
++gzip->cur_buf;
VPRINT(this, 4, "Read from thread #%zd file: type=%d, size=%d, addr=%zu\n",
thread_index, entry->type, entry->size, entry->addr);
return true;
}

template <>
bool
file_reader_t<gzFile>::is_complete()
file_reader_t<gzip_reader_t>::is_complete()
{
// The gzip reading interface does not support seeking to SEEK_END so there
// is no efficient way to read the footer.
Expand Down
20 changes: 18 additions & 2 deletions clients/drcachesim/reader/compressed_file_reader.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2017 Google, Inc. All rights reserved.
* Copyright (c) 2017-2022 Google, Inc. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -38,6 +38,22 @@
#include <zlib.h>
#include "file_reader.h"

typedef file_reader_t<gzFile> compressed_file_reader_t;
struct gzip_reader_t {
explicit gzip_reader_t(gzFile file)
: file(file)
{
}
gzFile file;
// Adding our own buffering to gzFile provides an 18% speedup. We use the same
// buffer size as zipfile_reader_t.
// If more readers want the same buffering we may want to bake this into the shared
// template class to avoid duplication: but some readers have good buffering
// already.
trace_entry_t buf[4096];
trace_entry_t *cur_buf = buf;
trace_entry_t *max_buf = buf;
};

typedef file_reader_t<gzip_reader_t> compressed_file_reader_t;

#endif /* _COMPRESSED_FILE_READER_H_ */
98 changes: 52 additions & 46 deletions clients/drcachesim/reader/zipfile_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,21 @@
/* clang-format off */ /* (make vera++ newline-after-type check happy) */
template <>
/* clang-format on */
file_reader_t<unzFile>::~file_reader_t<unzFile>()
file_reader_t<zipfile_reader_t>::~file_reader_t<zipfile_reader_t>()
{
for (auto file : input_files_)
unzClose(file);
for (auto &zipfile : input_files_)
unzClose(zipfile.file);
delete[] thread_eof_;
}

template <>
bool
file_reader_t<unzFile>::open_single_file(const std::string &path)
file_reader_t<zipfile_reader_t>::open_single_file(const std::string &path)
{
unzFile file = unzOpen(path.c_str());
if (file == nullptr)
return false;
input_files_.push_back(file);
input_files_.emplace_back(file);
if (unzGoToFirstFile(file) != UNZ_OK || unzOpenCurrentFile(file) != UNZ_OK)
return false;
VPRINT(this, 1, "Opened input file %s\n", path.c_str());
Expand All @@ -62,62 +62,68 @@ file_reader_t<unzFile>::open_single_file(const std::string &path)

template <>
bool
file_reader_t<unzFile>::read_next_thread_entry(size_t thread_index,
OUT trace_entry_t *entry, OUT bool *eof)
file_reader_t<zipfile_reader_t>::read_next_thread_entry(size_t thread_index,
OUT trace_entry_t *entry,
OUT bool *eof)
{
*eof = false;
// TODO i#5538: The reading performance for .zip files seems to be up to 60%
// slower than .gz files, which is unexpected since both use zlib and the checksum
// and header differences make zlib normally faster than gzip.
// Do we need to buffer ourselves here by reading many entries at once?
int num_read = unzReadCurrentFile(input_files_[thread_index], entry, sizeof(*entry));
if (num_read == 0) {
zipfile_reader_t *zipfile = &input_files_[thread_index];
if (zipfile->cur_buf >= zipfile->max_buf) {
int num_read =
unzReadCurrentFile(zipfile->file, zipfile->buf, sizeof(zipfile->buf));
if (num_read == 0) {
#ifdef DEBUG
if (verbosity_ >= 3) {
char name[128];
name[0] = '\0'; /* Just in case. */
// This call is expensive if we do it every time.
unzGetCurrentFileInfo64(input_files_[thread_index], nullptr, name,
sizeof(name), nullptr, 0, nullptr, 0);
VPRINT(this, 3,
"Thread #%zd hit end of component %s; opening next component\n",
thread_index, name);
}
if (verbosity_ >= 3) {
char name[128];
name[0] = '\0'; /* Just in case. */
// This call is expensive if we do it every time.
unzGetCurrentFileInfo64(zipfile->file, nullptr, name, sizeof(name),
nullptr, 0, nullptr, 0);
VPRINT(this, 3,
"Thread #%zd hit end of component %s; opening next component\n",
thread_index, name);
}
#endif
// read_next_entry() stores the last entry into entry_copy_.
if ((entry_copy_.type != TRACE_TYPE_MARKER ||
entry_copy_.size != TRACE_MARKER_TYPE_CHUNK_FOOTER) &&
entry_copy_.type != TRACE_TYPE_FOOTER) {
VPRINT(this, 1, "Chunk is missing footer: truncation detection\n");
return false;
}
if (unzCloseCurrentFile(input_files_[thread_index]) != UNZ_OK)
return false;
int res = unzGoToNextFile(input_files_[thread_index]);
if (res != UNZ_OK) {
if (res == UNZ_END_OF_LIST_OF_FILE) {
VPRINT(this, 2, "Thread #%zd hit EOF\n", thread_index);
*eof = true;
// read_next_entry() stores the last entry into entry_copy_.
if ((entry_copy_.type != TRACE_TYPE_MARKER ||
entry_copy_.size != TRACE_MARKER_TYPE_CHUNK_FOOTER) &&
entry_copy_.type != TRACE_TYPE_FOOTER) {
VPRINT(this, 1, "Chunk is missing footer: truncation detection\n");
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
return false;
}
return false;
if (unzCloseCurrentFile(zipfile->file) != UNZ_OK)
return false;
int res = unzGoToNextFile(zipfile->file);
if (res != UNZ_OK) {
if (res == UNZ_END_OF_LIST_OF_FILE) {
VPRINT(this, 2, "Thread #%zd hit EOF\n", thread_index);
*eof = true;
}
return false;
}
if (unzOpenCurrentFile(zipfile->file) != UNZ_OK)
return false;
num_read =
unzReadCurrentFile(zipfile->file, zipfile->buf, sizeof(zipfile->buf));
}
if (unzOpenCurrentFile(input_files_[thread_index]) != UNZ_OK)
if (num_read < static_cast<int>(sizeof(*entry))) {
VPRINT(this, 1, "Thread #%zd failed to read: returned %d\n", thread_index,
num_read);
return false;
num_read = unzReadCurrentFile(input_files_[thread_index], entry, sizeof(*entry));
}
if (num_read < static_cast<int>(sizeof(*entry))) {
VPRINT(this, 1, "Thread #%zd failed to read: returned %d\n", thread_index,
num_read);
return false;
}
zipfile->cur_buf = zipfile->buf;
zipfile->max_buf = zipfile->buf + (num_read / sizeof(*zipfile->max_buf));
}
*entry = *zipfile->cur_buf;
++zipfile->cur_buf;
VPRINT(this, 4, "Read from thread #%zd: type=%d, size=%d, addr=%zu\n", thread_index,
entry->type, entry->size, entry->addr);
return true;
}

template <>
bool
file_reader_t<unzFile>::is_complete()
file_reader_t<zipfile_reader_t>::is_complete()
{
// We could call unzeof() but we need the thread index.
// XXX: We should remove or change this interface. It is not used now.
Expand Down
16 changes: 15 additions & 1 deletion clients/drcachesim/reader/zipfile_file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@
#include "minizip/unzip.h"
#include "file_reader.h"

typedef file_reader_t<unzFile> zipfile_file_reader_t;
struct zipfile_reader_t {
explicit zipfile_reader_t(unzFile file)
: file(file)
{
}
unzFile file;
// Without our own buffering, reading one trace_entry_t record at a time
// is 60% slower. This buffer size was picked through experimentation to
// perform well without taking up too much memory.
trace_entry_t buf[4096];
trace_entry_t *cur_buf = buf;
trace_entry_t *max_buf = buf;
};

typedef file_reader_t<zipfile_reader_t> zipfile_file_reader_t;

#endif /* _ZIPFILE_FILE_READER_H_ */