-
Notifications
You must be signed in to change notification settings - Fork 571
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
raw2trace optimization: Use lz4 compression for final trace files to …
…speedup conversation time (#6115) Right now we use 3-phase scheme for trace processing: drmemtrace(collector) + drraw2trace(convertor) + analysis tools. And trace converting is the most time consuming part. Ex. java -version drmemtrace: real 0m36.873s user 0m43.101s sys 0m4.459s drraw2trace: real 2m27.796s user 3m1.469s sys 0m1.248s opcodes tool: real 0m38.956s user 0m48.660s sys 0m0.340s We used specjvm2008 benchmarks for verification (crypto.aes, xml.transform, crypto.rsa) crypto.aes Original convertation time 293m lz4 converstation time 137m xml.transform Original convertation time 293m lz4 converstation time 137m crypto.rsa Original convertation time 293m lz4 converstation time 137m Adding bitset_hash_table_t gives 20% speed up if we stay on original approach.
- Loading branch information
Showing
12 changed files
with
466 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
/* ********************************************************** | ||
* Copyright (c) 2023 Google, Inc. All rights reserved. | ||
* **********************************************************/ | ||
|
||
/* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* * Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* * Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* * Neither the name of Google, Inc. nor the names of its contributors may be | ||
* used to endorse or promote products derived from this software without | ||
* specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE | ||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH | ||
* DAMAGE. | ||
*/ | ||
|
||
/* lz4_ostream_t: a wrapper around lz4 to match the parts of the | ||
* std::ostream interface we use for raw2trace and file_reader_t. | ||
*/ | ||
|
||
#ifndef _LZ4_OSTREAM_H_ | ||
#define _LZ4_OSTREAM_H_ 1 | ||
|
||
#ifndef HAS_LZ4 | ||
# error HAS_LZ4 is required | ||
#endif | ||
|
||
#include <array> | ||
#include <streambuf> | ||
#include <vector> | ||
#include <lz4frame.h> | ||
|
||
namespace dynamorio { | ||
namespace drmemtrace { | ||
|
||
class lz4_ostreambuf_t : public std::basic_streambuf<char, std::char_traits<char>> { | ||
public: | ||
lz4_ostreambuf_t(const std::string &path) | ||
{ | ||
auto res = LZ4F_createCompressionContext(&lzctx_, LZ4F_VERSION); | ||
if (LZ4F_isError(res)) { | ||
return; | ||
} | ||
|
||
dest_buf_.reserve(LZ4F_compressBound(src_buf_.size(), nullptr)); | ||
file_ = new std::ofstream(path); | ||
|
||
char *base = &src_buf_.front(); | ||
setp(base, base + src_buf_.size() - 1); | ||
|
||
write_header(); | ||
} | ||
|
||
~lz4_ostreambuf_t() | ||
{ | ||
sync(); | ||
write_footer(); | ||
if (file_ != nullptr) { | ||
delete file_; | ||
file_ = nullptr; | ||
} | ||
LZ4F_freeCompressionContext(lzctx_); | ||
} | ||
|
||
private: | ||
int | ||
overflow(int extra_char) override | ||
{ | ||
if (file_ == nullptr) | ||
return traits_type::eof(); | ||
|
||
if (extra_char != traits_type::eof()) { | ||
*pptr() = traits_type::to_char_type(extra_char); | ||
pbump(1); | ||
} | ||
|
||
int size = static_cast<int>(pptr() - pbase()); | ||
pbump(-size); | ||
auto ret = LZ4F_compressUpdate(lzctx_, &dest_buf_.front(), dest_buf_.capacity(), | ||
pbase(), size, nullptr); | ||
if (LZ4F_isError(ret)) { | ||
return traits_type::eof(); | ||
} | ||
|
||
file_->write(&dest_buf_.front(), ret); | ||
return traits_type::not_eof(extra_char); | ||
} | ||
|
||
int | ||
sync() override | ||
{ | ||
return overflow(traits_type::eof()); | ||
} | ||
|
||
void | ||
write_header() | ||
{ | ||
auto res = | ||
LZ4F_compressBegin(lzctx_, &dest_buf_.front(), dest_buf_.capacity(), nullptr); | ||
if (LZ4F_isError(res)) { | ||
return; | ||
} | ||
file_->write(&dest_buf_.front(), res); | ||
} | ||
|
||
void | ||
write_footer() | ||
{ | ||
auto res = | ||
LZ4F_compressEnd(lzctx_, &dest_buf_.front(), dest_buf_.capacity(), nullptr); | ||
if (LZ4F_isError(res)) { | ||
return; | ||
} | ||
file_->write(&dest_buf_.front(), res); | ||
} | ||
|
||
private: | ||
static const int buffer_size_ = 1024 * 1024; | ||
std::ostream *file_ = nullptr; | ||
std::array<char, buffer_size_> src_buf_; | ||
std::vector<char> dest_buf_; | ||
LZ4F_compressionContext_t lzctx_ = nullptr; | ||
}; | ||
|
||
class lz4_ostream_t : public std::ostream { | ||
public: | ||
explicit lz4_ostream_t(const std::string &path) | ||
: std::ostream(new lz4_ostreambuf_t(path)) | ||
{ | ||
if (!rdbuf()) | ||
setstate(std::ios::badbit); | ||
} | ||
|
||
~lz4_ostream_t() override | ||
{ | ||
delete rdbuf(); | ||
} | ||
}; | ||
|
||
} // namespace drmemtrace | ||
} // namespace dynamorio | ||
|
||
#endif // _LZ4_OSTREAM_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
/* ********************************************************** | ||
* Copyright (c) 2023 Google, Inc. All rights reserved. | ||
* **********************************************************/ | ||
|
||
/* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* * Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* * Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* * Neither the name of Google, Inc. nor the names of its contributors may be | ||
* used to endorse or promote products derived from this software without | ||
* specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE | ||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH | ||
* DAMAGE. | ||
*/ | ||
|
||
#include "lz4_file_reader.h" | ||
|
||
namespace dynamorio { | ||
namespace drmemtrace { | ||
|
||
trace_entry_t * | ||
read_next_entry_common(lz4_reader_t *reader, bool *eof) | ||
{ | ||
if (reader->cur_buf >= reader->max_buf) { | ||
int len = reader->file | ||
->read(reinterpret_cast<char *>(&reader->buf), sizeof(reader->buf)) | ||
.gcount(); | ||
if (len < static_cast<int>(sizeof(trace_entry_t)) || | ||
len % static_cast<int>(sizeof(trace_entry_t)) != 0) { | ||
*eof = (len >= 0); | ||
return nullptr; | ||
} | ||
reader->cur_buf = reader->buf; | ||
reader->max_buf = reader->buf + (len / sizeof(trace_entry_t)); | ||
} | ||
trace_entry_t *res = reader->cur_buf; | ||
++reader->cur_buf; | ||
return res; | ||
} | ||
|
||
/************************************************** | ||
* lz4_reader_t specializations for file_reader_t. | ||
*/ | ||
|
||
/* clang-format off */ /* (make vera++ newline-after-type check happy) */ | ||
template <> | ||
/* clang-format on */ | ||
file_reader_t<lz4_reader_t>::file_reader_t() | ||
{ | ||
input_file_.file = nullptr; | ||
} | ||
|
||
/* clang-format off */ /* (make vera++ newline-after-type check happy) */ | ||
template <> | ||
/* clang-format on */ | ||
file_reader_t<lz4_reader_t>::~file_reader_t<lz4_reader_t>() | ||
{ | ||
if (input_file_.file != nullptr) { | ||
delete input_file_.file; | ||
input_file_.file = nullptr; | ||
} | ||
} | ||
|
||
template <> | ||
bool | ||
file_reader_t<lz4_reader_t>::open_single_file(const std::string &path) | ||
{ | ||
auto file = new lz4_istream_t(path); | ||
VPRINT(this, 1, "Opened input file %s\n", path.c_str()); | ||
input_file_ = lz4_reader_t(file); | ||
return true; | ||
} | ||
|
||
template <> | ||
trace_entry_t * | ||
file_reader_t<lz4_reader_t>::read_next_entry() | ||
{ | ||
trace_entry_t *entry = read_queued_entry(); | ||
if (entry != nullptr) | ||
return entry; | ||
entry = read_next_entry_common(&input_file_, &at_eof_); | ||
if (entry == nullptr) | ||
return entry; | ||
VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n", | ||
trace_type_names[entry->type], entry->type, entry->size, entry->addr); | ||
entry_copy_ = *entry; | ||
return &entry_copy_; | ||
} | ||
|
||
} // namespace drmemtrace | ||
} // namespace dynamorio |
Oops, something went wrong.