From 87ecfdd2d58e5b0bd5952448c08e44b0b2fbbcc5 Mon Sep 17 00:00:00 2001 From: Carlos Caballero Date: Fri, 27 Sep 2024 09:29:23 +0000 Subject: [PATCH] Add support for ARM SPE data Change-Id: Id0e3dee1a72dac45966df0c2506b2cadffa3f19e --- Android.bp | 7 + BUILD | 8 + .../importers/common/trace_parser.cc | 9 +- .../importers/common/trace_parser.h | 6 + src/trace_processor/importers/perf/BUILD.gn | 5 + .../importers/perf/aux_record.cc | 1 + .../importers/perf/aux_record.h | 3 + .../importers/perf/aux_stream_manager.cc | 5 + .../importers/perf/aux_stream_manager.h | 15 + .../importers/perf/perf_session.cc | 30 +- .../importers/perf/perf_session.h | 4 + src/trace_processor/importers/perf/spe.h | 405 ++++++++++++++++++ .../importers/perf/spe_record_parser.cc | 362 ++++++++++++++++ .../importers/perf/spe_record_parser.h | 109 +++++ .../importers/perf/spe_tokenizer.cc | 145 +++++++ .../importers/perf/spe_tokenizer.h | 69 +++ .../perfetto_sql/stdlib/linux/perf/BUILD.gn | 5 +- .../perfetto_sql/stdlib/linux/perf/spe.sql | 125 ++++++ src/trace_processor/sorter/trace_sorter.cc | 10 + src/trace_processor/sorter/trace_sorter.h | 28 +- src/trace_processor/storage/stats.h | 7 + src/trace_processor/storage/trace_storage.h | 11 + src/trace_processor/tables/BUILD.gn | 1 + src/trace_processor/tables/perf_tables.py | 101 +++++ .../tables/table_destructors.cc | 4 + src/trace_processor/trace_processor_impl.cc | 5 + .../types/trace_processor_context.h | 2 + .../util/trace_blob_view_reader.cc | 10 +- .../util/trace_blob_view_reader.h | 79 +++- test/data/simpleperf/spe.trace.zip.sha256 | 1 + .../diff_tests/parser/simpleperf/tests.py | 61 ++- 31 files changed, 1591 insertions(+), 42 deletions(-) create mode 100644 src/trace_processor/importers/perf/spe.h create mode 100644 src/trace_processor/importers/perf/spe_record_parser.cc create mode 100644 src/trace_processor/importers/perf/spe_record_parser.h create mode 100644 src/trace_processor/importers/perf/spe_tokenizer.cc create mode 100644 src/trace_processor/importers/perf/spe_tokenizer.h create mode 100644 src/trace_processor/perfetto_sql/stdlib/linux/perf/spe.sql create mode 100644 src/trace_processor/tables/perf_tables.py create mode 100644 test/data/simpleperf/spe.trace.zip.sha256 diff --git a/Android.bp b/Android.bp index 4fea39a3df..187e36c236 100644 --- a/Android.bp +++ b/Android.bp @@ -12642,6 +12642,8 @@ filegroup { "src/trace_processor/importers/perf/record_parser.cc", "src/trace_processor/importers/perf/sample.cc", "src/trace_processor/importers/perf/sample_id.cc", + "src/trace_processor/importers/perf/spe_record_parser.cc", + "src/trace_processor/importers/perf/spe_tokenizer.cc", ], } @@ -13398,6 +13400,7 @@ python_binary_host { "src/trace_processor/tables/jit_tables.py", "src/trace_processor/tables/memory_tables.py", "src/trace_processor/tables/metadata_tables.py", + "src/trace_processor/tables/perf_tables.py", "src/trace_processor/tables/profiler_tables.py", "src/trace_processor/tables/sched_tables.py", "src/trace_processor/tables/slice_tables.py", @@ -13568,6 +13571,7 @@ genrule { "src/trace_processor/perfetto_sql/stdlib/linux/memory/high_watermark.sql", "src/trace_processor/perfetto_sql/stdlib/linux/memory/process.sql", "src/trace_processor/perfetto_sql/stdlib/linux/perf/samples.sql", + "src/trace_processor/perfetto_sql/stdlib/linux/perf/spe.sql", "src/trace_processor/perfetto_sql/stdlib/linux/threads.sql", "src/trace_processor/perfetto_sql/stdlib/pkvm/hypervisor.sql", "src/trace_processor/perfetto_sql/stdlib/prelude/casts.sql", @@ -13794,6 +13798,7 @@ genrule { "src/trace_processor/tables/jit_tables.py", "src/trace_processor/tables/memory_tables.py", "src/trace_processor/tables/metadata_tables.py", + "src/trace_processor/tables/perf_tables.py", "src/trace_processor/tables/profiler_tables.py", "src/trace_processor/tables/sched_tables.py", "src/trace_processor/tables/slice_tables.py", @@ -13813,6 +13818,7 @@ genrule { "src/trace_processor/tables/jit_tables_py.h", "src/trace_processor/tables/memory_tables_py.h", "src/trace_processor/tables/metadata_tables_py.h", + "src/trace_processor/tables/perf_tables_py.h", "src/trace_processor/tables/profiler_tables_py.h", "src/trace_processor/tables/sched_tables_py.h", "src/trace_processor/tables/slice_tables_py.h", @@ -13836,6 +13842,7 @@ python_binary_host { "src/trace_processor/tables/jit_tables.py", "src/trace_processor/tables/memory_tables.py", "src/trace_processor/tables/metadata_tables.py", + "src/trace_processor/tables/perf_tables.py", "src/trace_processor/tables/profiler_tables.py", "src/trace_processor/tables/sched_tables.py", "src/trace_processor/tables/slice_tables.py", diff --git a/BUILD b/BUILD index b31876a10c..d3b61e7b0b 100644 --- a/BUILD +++ b/BUILD @@ -1816,6 +1816,11 @@ perfetto_filegroup( "src/trace_processor/importers/perf/sample.h", "src/trace_processor/importers/perf/sample_id.cc", "src/trace_processor/importers/perf/sample_id.h", + "src/trace_processor/importers/perf/spe.h", + "src/trace_processor/importers/perf/spe_record_parser.cc", + "src/trace_processor/importers/perf/spe_record_parser.h", + "src/trace_processor/importers/perf/spe_tokenizer.cc", + "src/trace_processor/importers/perf/spe_tokenizer.h", "src/trace_processor/importers/perf/time_conv_record.h", "src/trace_processor/importers/perf/util.h", ], @@ -2862,6 +2867,7 @@ perfetto_filegroup( name = "src_trace_processor_perfetto_sql_stdlib_linux_perf_perf", srcs = [ "src/trace_processor/perfetto_sql/stdlib/linux/perf/samples.sql", + "src/trace_processor/perfetto_sql/stdlib/linux/perf/spe.sql", ], ) @@ -3156,6 +3162,7 @@ perfetto_cc_tp_tables( "src/trace_processor/tables/jit_tables.py", "src/trace_processor/tables/memory_tables.py", "src/trace_processor/tables/metadata_tables.py", + "src/trace_processor/tables/perf_tables.py", "src/trace_processor/tables/profiler_tables.py", "src/trace_processor/tables/sched_tables.py", "src/trace_processor/tables/slice_tables.py", @@ -3171,6 +3178,7 @@ perfetto_cc_tp_tables( "src/trace_processor/tables/jit_tables_py.h", "src/trace_processor/tables/memory_tables_py.h", "src/trace_processor/tables/metadata_tables_py.h", + "src/trace_processor/tables/perf_tables_py.h", "src/trace_processor/tables/profiler_tables_py.h", "src/trace_processor/tables/sched_tables_py.h", "src/trace_processor/tables/slice_tables_py.h", diff --git a/src/trace_processor/importers/common/trace_parser.cc b/src/trace_processor/importers/common/trace_parser.cc index a667b7530e..f0facadb73 100644 --- a/src/trace_processor/importers/common/trace_parser.cc +++ b/src/trace_processor/importers/common/trace_parser.cc @@ -19,12 +19,13 @@ namespace perfetto { namespace trace_processor { -ProtoTraceParser::~ProtoTraceParser() = default; -JsonTraceParser::~JsonTraceParser() = default; +AndroidLogEventParser::~AndroidLogEventParser() = default; FuchsiaRecordParser::~FuchsiaRecordParser() = default; -PerfRecordParser::~PerfRecordParser() = default; InstrumentsRowParser::~InstrumentsRowParser() = default; -AndroidLogEventParser::~AndroidLogEventParser() = default; +JsonTraceParser::~JsonTraceParser() = default; +PerfRecordParser::~PerfRecordParser() = default; +ProtoTraceParser::~ProtoTraceParser() = default; +SpeRecordParser::~SpeRecordParser() = default; } // namespace trace_processor } // namespace perfetto diff --git a/src/trace_processor/importers/common/trace_parser.h b/src/trace_processor/importers/common/trace_parser.h index cf7c84a124..c1da87239f 100644 --- a/src/trace_processor/importers/common/trace_parser.h +++ b/src/trace_processor/importers/common/trace_parser.h @@ -70,6 +70,12 @@ class PerfRecordParser { virtual void ParsePerfRecord(int64_t, perf_importer::Record) = 0; }; +class SpeRecordParser { + public: + virtual ~SpeRecordParser(); + virtual void ParseSpeRecord(int64_t, TraceBlobView) = 0; +}; + class InstrumentsRowParser { public: virtual ~InstrumentsRowParser(); diff --git a/src/trace_processor/importers/perf/BUILD.gn b/src/trace_processor/importers/perf/BUILD.gn index 8234a6607e..c0d0bd4e8f 100644 --- a/src/trace_processor/importers/perf/BUILD.gn +++ b/src/trace_processor/importers/perf/BUILD.gn @@ -88,6 +88,11 @@ source_set("perf") { "sample.h", "sample_id.cc", "sample_id.h", + "spe.h", + "spe_record_parser.cc", + "spe_record_parser.h", + "spe_tokenizer.cc", + "spe_tokenizer.h", "time_conv_record.h", "util.h", ] diff --git a/src/trace_processor/importers/perf/aux_record.cc b/src/trace_processor/importers/perf/aux_record.cc index 89a5407c22..73d22e3859 100644 --- a/src/trace_processor/importers/perf/aux_record.cc +++ b/src/trace_processor/importers/perf/aux_record.cc @@ -27,6 +27,7 @@ namespace perfetto::trace_processor::perf_importer { // static base::Status AuxRecord::Parse(const Record& record) { + attr = record.attr; Reader reader(record.payload.copy()); if (!reader.Read(offset) || !reader.Read(size) || !reader.Read(flags)) { return base::ErrStatus("Failed to parse AUX record"); diff --git a/src/trace_processor/importers/perf/aux_record.h b/src/trace_processor/importers/perf/aux_record.h index 2e9fc1d167..770a5ae57d 100644 --- a/src/trace_processor/importers/perf/aux_record.h +++ b/src/trace_processor/importers/perf/aux_record.h @@ -21,6 +21,8 @@ #include #include "perfetto/base/status.h" +#include "perfetto/trace_processor/ref_counted.h" +#include "src/trace_processor/importers/perf/perf_event_attr.h" #include "src/trace_processor/importers/perf/sample_id.h" namespace perfetto::trace_processor::perf_importer { @@ -30,6 +32,7 @@ struct AuxRecord { base::Status Parse(const Record& record); uint64_t end() const { return offset + size; } + RefPtr attr; uint64_t offset; uint64_t size; uint64_t flags; diff --git a/src/trace_processor/importers/perf/aux_stream_manager.cc b/src/trace_processor/importers/perf/aux_stream_manager.cc index d613636024..482f17d136 100644 --- a/src/trace_processor/importers/perf/aux_stream_manager.cc +++ b/src/trace_processor/importers/perf/aux_stream_manager.cc @@ -31,6 +31,7 @@ #include "src/trace_processor/importers/perf/etm_tokenizer.h" #include "src/trace_processor/importers/perf/perf_event.h" #include "src/trace_processor/importers/perf/record.h" +#include "src/trace_processor/importers/perf/spe_tokenizer.h" #include "src/trace_processor/storage/trace_storage.h" #include "src/trace_processor/types/trace_processor_context.h" #include "src/trace_processor/util/status_macros.h" @@ -58,6 +59,10 @@ base::Status AuxStreamManager::OnAuxtraceInfoRecord(AuxtraceInfoRecord info) { CreateEtmTokenizerFactory(std::move(info.payload))); break; } + case PERF_AUXTRACE_ARM_SPE: { + tokenizer_factory_ = std::make_unique(); + break; + } default: context_->storage->IncrementIndexedStats(stats::perf_unknown_aux_data, static_cast(info.type)); diff --git a/src/trace_processor/importers/perf/aux_stream_manager.h b/src/trace_processor/importers/perf/aux_stream_manager.h index 67e14611fe..2cbde78d89 100644 --- a/src/trace_processor/importers/perf/aux_stream_manager.h +++ b/src/trace_processor/importers/perf/aux_stream_manager.h @@ -33,6 +33,7 @@ #include "src/trace_processor/importers/perf/itrace_start_record.h" #include "src/trace_processor/importers/perf/perf_session.h" #include "src/trace_processor/importers/perf/time_conv_record.h" +#include "src/trace_processor/storage/stats.h" namespace perfetto { namespace trace_processor { @@ -58,6 +59,7 @@ class AuxStream { base::Status OnItraceStartRecord(ItraceStartRecord start) { return tokenizer_->OnItraceStartRecord(std::move(start)); } + std::optional ConvertTscToPerfTime(uint64_t cycles); private: class AuxtraceDataChunk { @@ -109,6 +111,14 @@ class AuxStreamManager { TraceProcessorContext* context() const { return context_; } + std::optional ConvertTscToPerfTime(uint64_t cycles) { + if (!time_conv_) { + context_->storage->IncrementStats(stats::perf_no_tsc_data); + return std::nullopt; + } + return time_conv_->ConvertTscToPerfTime(cycles); + } + private: base::StatusOr> GetOrCreateStreamForSampleId(const std::optional& sample_id); @@ -122,6 +132,11 @@ class AuxStreamManager { std::optional time_conv_; }; +inline std::optional AuxStream::ConvertTscToPerfTime( + uint64_t cycles) { + return manager_.ConvertTscToPerfTime(cycles); +} + } // namespace perf_importer } // namespace trace_processor } // namespace perfetto diff --git a/src/trace_processor/importers/perf/perf_session.cc b/src/trace_processor/importers/perf/perf_session.cc index 81bcb3efa2..fb4963aebf 100644 --- a/src/trace_processor/importers/perf/perf_session.cc +++ b/src/trace_processor/importers/perf/perf_session.cc @@ -58,17 +58,20 @@ base::StatusOr> PerfSession::Builder::Build() { auto perf_session_id = context_->storage->mutable_perf_session_table()->Insert({}).id; - PerfEventAttr base_attr(context_, perf_session_id, attr_with_ids_[0].attr); + RefPtr first_attr; base::FlatHashMap> attrs_by_id; for (const auto& entry : attr_with_ids_) { RefPtr attr( new PerfEventAttr(context_, perf_session_id, entry.attr)); - if (base_attr.sample_id_all() != attr->sample_id_all()) { + if (!first_attr) { + first_attr = attr; + } + if (first_attr->sample_id_all() != attr->sample_id_all()) { return base::ErrStatus( "perf_event_attr with different sample_id_all values"); } - if (!OffsetsMatch(base_attr, *attr)) { + if (!OffsetsMatch(*first_attr, *attr)) { return base::ErrStatus("perf_event_attr with different id offsets"); } @@ -80,14 +83,14 @@ base::StatusOr> PerfSession::Builder::Build() { } } if (attr_with_ids_.size() > 1 && - (!base_attr.id_offset_from_start().has_value() || - (base_attr.sample_id_all() && - !base_attr.id_offset_from_end().has_value()))) { + (!first_attr->id_offset_from_start().has_value() || + (first_attr->sample_id_all() && + !first_attr->id_offset_from_end().has_value()))) { return base::ErrStatus("No id offsets for multiple perf_event_attr"); } - return RefPtr(new PerfSession(context_, perf_session_id, - std::move(attrs_by_id), - attr_with_ids_.size() == 1)); + return RefPtr( + new PerfSession(context_, perf_session_id, std::move(first_attr), + std::move(attrs_by_id), attr_with_ids_.size() == 1)); } base::StatusOr> PerfSession::FindAttrForRecord( @@ -97,13 +100,12 @@ base::StatusOr> PerfSession::FindAttrForRecord( return RefPtr(); } - RefPtr first(attrs_by_id_.GetIterator().value().get()); if (has_single_perf_event_attr_) { - return first; + return first_attr_; } - if (header.type != PERF_RECORD_SAMPLE && !first->sample_id_all()) { - return first; + if (header.type != PERF_RECORD_SAMPLE && !first_attr_->sample_id_all()) { + return first_attr_; } uint64_t id; @@ -112,7 +114,7 @@ base::StatusOr> PerfSession::FindAttrForRecord( } if (id == 0) { - return first; + return first_attr_; } auto it = FindAttrForEventId(id); diff --git a/src/trace_processor/importers/perf/perf_session.h b/src/trace_processor/importers/perf/perf_session.h index 141f07a8b8..2a90ab14fe 100644 --- a/src/trace_processor/importers/perf/perf_session.h +++ b/src/trace_processor/importers/perf/perf_session.h @@ -109,10 +109,12 @@ class PerfSession : public RefCounted { PerfSession(TraceProcessorContext* context, tables::PerfSessionTable::Id perf_session_id, + RefPtr first_attr, base::FlatHashMap> attrs_by_id, bool has_single_perf_event_attr) : context_(context), perf_session_id_(perf_session_id), + first_attr_(std::move(first_attr)), attrs_by_id_(std::move(attrs_by_id)), has_single_perf_event_attr_(has_single_perf_event_attr) {} @@ -122,7 +124,9 @@ class PerfSession : public RefCounted { TraceProcessorContext* const context_; tables::PerfSessionTable::Id perf_session_id_; + RefPtr first_attr_; base::FlatHashMap> attrs_by_id_; + // Multiple ids can map to the same perf_event_attr. This member tells us // whether there was only one perf_event_attr (with potentially different ids // associated). This makes the attr lookup given a record trivial and not diff --git a/src/trace_processor/importers/perf/spe.h b/src/trace_processor/importers/perf/spe.h new file mode 100644 index 0000000000..156b3db18c --- /dev/null +++ b/src/trace_processor/importers/perf/spe.h @@ -0,0 +1,405 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Collection of constant and utilities to parse SPE data. +// SPE packet spec can be found here: +// Arm Architecture Reference Manual for A-profile architecture +// https://developer.arm.com/documentation/ddi0487/latest/ + +#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_H_ +#define SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_H_ + +#include +#include +#include +#include "perfetto/base/logging.h" +#include "perfetto/public/compiler.h" +namespace perfetto::trace_processor::perf_importer::spe { + +// Test whether a given bit is set. e.g. +// IsBitSet<1>(0b0010) == true +// IsBitSet<0>(0b0010) == false +template +inline constexpr bool IsBitSet(T value) { + static_assert(std::is_unsigned_v); + static_assert(bit < sizeof(T) * 8); + return value & (T(1) << bit); +} + +// Index value in Address packets +enum class AddressIndex : uint8_t { + kInstruction, + kBranchTarget, + kDataVirtual, + kDataPhysical, + kPrevBranchTarget, + kUnknown, + kMax = kUnknown, +}; + +// Index value in Counter packets +enum class CounterIndex : uint8_t { + kTotalLatency, + kIssueLatency, + kTranslationLatency, + kUnknown, + kMax = kUnknown, +}; + +enum class ContextIndex : uint8_t { + kEl1, + kEl2, + kUnknown, + kMax = kUnknown, +}; + +// Operation class for OperationType packets +enum class OperationClass : uint8_t { + kOther, + kLoadOrStoreOrAtomic, + kBranchOrExceptionReturn, + kUnknown, + kMax = kUnknown, +}; + +// Data source types for a payload of a DataSource packet +enum class DataSource : uint8_t { + kL1D, + kL2, + kPeerCore, + kLocalCluster, + kSysCache, + kPeerCluster, + kRemote, + kDram, + kUnknown, + kMax = kUnknown, +}; + +// Exception levels instructions can execute in. +enum class ExceptionLevel { kEl0, kEl1, kEl2, kEl3, kMax = kEl3 }; + +// Common constants to both short and extended headers +constexpr uint8_t COMMON_HEADER_MASK = 0b1111'1000; +constexpr uint8_t COMMON_HEADER_ADDRESS_PACKET = 0b1011'0000; +constexpr uint8_t COMMON_HEADER_COUNTER_PACKET = 0b1001'1000; + +constexpr uint8_t COMMON_HEADER_SIZE_MASK = 0b0011'0000; +constexpr uint8_t COMMON_HEADER_SIZE_MASK_RSHIFT = 4; + +constexpr uint8_t COMMON_HEADER_NO_PAYLOAD_MASK = 0b1110'0000; +constexpr uint8_t COMMON_HEADER_NO_PAYLOAD = 0b0000'0000; + +// Constants for short headers +constexpr uint8_t SHORT_HEADER_PADDING = 0b0000'0000; +constexpr uint8_t SHORT_HEADER_END_PACKET = 0b0000'0001; +constexpr uint8_t SHORT_HEADER_TIMESTAMP_PACKET = 0b0111'0001; + +constexpr uint8_t SHORT_HEADER_MASK_1 = 0b1100'1111; +constexpr uint8_t SHORT_HEADER_EVENTS_PACKET = 0b0100'0010; +constexpr uint8_t SHORT_HEADER_DATA_SOURCE_PACKET = 0b0100'0011; + +constexpr uint8_t SHORT_HEADER_MASK_2 = 0b1111'1100; +constexpr uint8_t SHORT_HEADER_CONTEXT_PACKET = 0b0110'0100; +constexpr uint8_t SHORT_HEADER_OPERATION_TYPE_PACKET = 0b0100'1000; + +constexpr uint8_t SHORT_HEADER_INDEX_MASK = 0b0000'0111; + +// Constants for extended headers +constexpr uint8_t EXTENDED_HEADER_MASK = 0b1110'0000; +constexpr uint8_t EXTENDED_HEADER = 0b0010'0000; + +constexpr uint8_t EXTENDED_HEADER_INDEX_MASK = 0b0000'0011; +constexpr uint8_t EXTENDED_HEADER_INDEX_LSHIFT = 3; + +// OperationType packet constants +constexpr uint8_t PKT_OP_TYPE_HEADER_CLASS_MASK = 0b0000'0011; +constexpr uint8_t PKT_OP_TYPE_HEADER_CLASS_OTHER = 0b0000'0000; +constexpr uint8_t PKT_OP_TYPE_HEADER_CLASS_LD_ST_ATOMIC = 0b0000'0001; +constexpr uint8_t PKT_OP_TYPE_HEADER_CLASS_BR_ERET = 0b0000'0010; + +constexpr uint8_t PKT_OP_TYPE_PAYLOAD_SUBCLASS_OTHER_MASK = 0b1111'1110; +constexpr uint8_t PKT_OP_TYPE_PAYLOAD_SUBCLASS_OTHER = 0b0000'0000; + +constexpr uint8_t PKT_OP_TYPE_PAYLOAD_SUBCLASS_SVE_OTHER_MASK = 0b1000'1001; +constexpr uint8_t PKT_OP_TYPE_PAYLOAD_SUBCLASS_SVE_OTHER = 0b0000'1000; + +// DataSource packet constants +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_L1D = 0b0000'0000; +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_L2 = 0b0000'1000; +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_PEER_CORE = 0b0000'1001; +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_LOCAL_CLUSTER = 0b0000'1010; +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_SYS_CACHE = 0b0000'1011; +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_PEER_CLUSTER = 0b0000'1100; +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_REMOTE = 0b0000'1101; +constexpr uint16_t PKT_DATA_SOURCE_PAYLOAD_DRAM = 0b0000'1110; + +// Helper to cast a value into a typed enum. Takes care of invalid inputs by +// returning the `kUnknown` value. +template +T ToEnum(uint8_t val) { + if (PERFETTO_LIKELY(val < static_cast(T::kMax))) { + return static_cast(val); + } + return T::kUnknown; +} + +// An SPE record is a collection of packets. An End or Timestamp packet signals +// the end of a record. Each record consists of a 1 or 2 byte header followed by +// 0 - 4 bytes of payload. The `ShortHeader`, and `ExtendedHeader` hide all the +// low level bit fiddling details of handling packets. When parsing a stream of +// SPE records you can just check the first byte in the stream to determine if +// it belongs to a short or extended header and then use the appropiate class to +// determine packet type, payload length and packet details. There are other +// helper classes to parse payloads for the different packets. + +// Checks if a header bytes is a padding packet. (no payload) +inline bool IsPadding(uint8_t byte) { + return byte == SHORT_HEADER_PADDING; +} + +// Checks if a header byte corresponds to an extended header. +inline bool IsExtendedHeader(uint8_t byte) { + return (byte & EXTENDED_HEADER_MASK) == EXTENDED_HEADER; +} + +class ShortHeader { + public: + explicit ShortHeader(uint8_t byte) : byte_0_(byte) { + PERFETTO_DCHECK(!IsExtendedHeader(byte)); + } + + inline bool IsPadding() { return byte_0_ == SHORT_HEADER_PADDING; } + + inline bool IsEndPacket() { return byte_0_ == SHORT_HEADER_END_PACKET; } + + inline bool IsTimestampPacket() { + return byte_0_ == SHORT_HEADER_TIMESTAMP_PACKET; + } + + bool IsAddressPacket() const { + return (byte_0_ & COMMON_HEADER_MASK) == COMMON_HEADER_ADDRESS_PACKET; + } + + AddressIndex GetAddressIndex() const { + PERFETTO_DCHECK(IsAddressPacket()); + return ToEnum(index()); + } + + bool IsCounterPacket() const { + return (byte_0_ & COMMON_HEADER_MASK) == COMMON_HEADER_COUNTER_PACKET; + } + + CounterIndex GetCounterIndex() const { + PERFETTO_DCHECK(IsCounterPacket()); + return ToEnum(index()); + } + + bool IsEventsPacket() const { + return (byte_0_ & SHORT_HEADER_MASK_1) == SHORT_HEADER_EVENTS_PACKET; + } + + bool IsContextPacket() const { + return (byte_0_ & SHORT_HEADER_MASK_2) == SHORT_HEADER_CONTEXT_PACKET; + } + + ContextIndex GetContextIndex() const { return ToEnum(index()); } + + bool IsDataSourcePacket() const { + return (byte_0_ & SHORT_HEADER_MASK_1) == SHORT_HEADER_DATA_SOURCE_PACKET; + } + + DataSource GetDataSource(uint64_t payload) { + PERFETTO_DCHECK(IsDataSourcePacket()); + switch (payload) { + case PKT_DATA_SOURCE_PAYLOAD_L1D: + return DataSource::kL1D; + case PKT_DATA_SOURCE_PAYLOAD_L2: + return DataSource::kL2; + case PKT_DATA_SOURCE_PAYLOAD_PEER_CORE: + return DataSource::kPeerCore; + case PKT_DATA_SOURCE_PAYLOAD_LOCAL_CLUSTER: + return DataSource::kLocalCluster; + case PKT_DATA_SOURCE_PAYLOAD_SYS_CACHE: + return DataSource::kSysCache; + case PKT_DATA_SOURCE_PAYLOAD_PEER_CLUSTER: + return DataSource::kPeerCluster; + case PKT_DATA_SOURCE_PAYLOAD_REMOTE: + return DataSource::kRemote; + case PKT_DATA_SOURCE_PAYLOAD_DRAM: + return DataSource::kDram; + default: + break; + } + return DataSource::kUnknown; + } + + bool IsOperationTypePacket() const { + return (byte_0_ & SHORT_HEADER_MASK_2) == + SHORT_HEADER_OPERATION_TYPE_PACKET; + } + + OperationClass GetOperationClass() const { + PERFETTO_DCHECK(IsOperationTypePacket()); + switch (byte_0_ & PKT_OP_TYPE_HEADER_CLASS_MASK) { + case PKT_OP_TYPE_HEADER_CLASS_OTHER: + return OperationClass::kOther; + + case PKT_OP_TYPE_HEADER_CLASS_LD_ST_ATOMIC: + return OperationClass::kLoadOrStoreOrAtomic; + + case PKT_OP_TYPE_HEADER_CLASS_BR_ERET: + return OperationClass::kBranchOrExceptionReturn; + + default: + break; + } + return OperationClass::kUnknown; + } + + bool HasPayload() const { + return (byte_0_ & COMMON_HEADER_NO_PAYLOAD_MASK) != + COMMON_HEADER_NO_PAYLOAD; + } + + uint8_t GetPayloadSize() const { + PERFETTO_DCHECK(!IsExtendedHeader(byte_0_)); + if (!HasPayload()) { + return 0; + } + return static_cast(1 << ((byte_0_ & COMMON_HEADER_SIZE_MASK) >> + COMMON_HEADER_SIZE_MASK_RSHIFT)); + } + + private: + friend class ExtendedHeader; + + uint8_t index() const { return byte_0_ & SHORT_HEADER_INDEX_MASK; } + + uint8_t byte_0_; +}; + +class ExtendedHeader { + public: + ExtendedHeader(uint8_t byte_0, uint8_t byte_1) + : byte_0_(byte_0), short_header_(byte_1) { + PERFETTO_DCHECK(IsExtendedHeader(byte_0)); + } + + bool IsAddressPacket() const { return short_header_.IsAddressPacket(); } + + AddressIndex GetAddressIndex() const { return ToEnum(index()); } + + bool IsCounterPacket() const { return short_header_.IsCounterPacket(); } + + CounterIndex GetCounterIndex() const { return ToEnum(index()); } + + inline uint8_t GetPayloadSize() { return short_header_.GetPayloadSize(); } + + private: + uint8_t byte_1() const { return short_header_.byte_0_; } + + uint8_t index() const { + return static_cast((byte_0_ & EXTENDED_HEADER_INDEX_MASK) + << EXTENDED_HEADER_INDEX_LSHIFT) + + short_header_.index(); + } + + uint8_t byte_0_; + ShortHeader short_header_; +}; + +enum class OperationOtherSubclass : uint8_t { + kOther, + kSveVecOp, + kUnknown, + kMax = kUnknown +}; +class OperationTypeOtherPayload { + public: + explicit OperationTypeOtherPayload(uint8_t payload) : payload_(payload) {} + + OperationOtherSubclass subclass() const { + if ((payload_ & PKT_OP_TYPE_PAYLOAD_SUBCLASS_OTHER_MASK) == + PKT_OP_TYPE_PAYLOAD_SUBCLASS_OTHER) { + return OperationOtherSubclass::kOther; + } + if ((payload_ & PKT_OP_TYPE_PAYLOAD_SUBCLASS_SVE_OTHER_MASK) == + PKT_OP_TYPE_PAYLOAD_SUBCLASS_SVE_OTHER) { + return OperationOtherSubclass::kSveVecOp; + } + return OperationOtherSubclass::kUnknown; + } + + private: + uint8_t payload_; +}; + +class OperationTypeLdStAtPayload { + public: + explicit OperationTypeLdStAtPayload(uint8_t payload) : payload_(payload) {} + + bool IsStore() const { return IsBitSet<0>(payload_); } + + private: + uint8_t payload_; +}; + +namespace internal { +inline uint64_t GetPacketAddressAddress(uint64_t payload) { + return payload & 0x0FFFFFFFFFFFFFFF; +} + +inline bool GetPacketAddressNs(uint64_t payload) { + return IsBitSet<63>(payload); +} + +inline ExceptionLevel GetPacketAddressEl(uint64_t payload) { + return static_cast((payload >> 61) & 0x03); +} + +inline bool GetPacketAddressNse(uint64_t payload) { + return IsBitSet<60>(payload); +} +} // namespace internal + +struct InstructionVirtualAddress { + explicit InstructionVirtualAddress(uint64_t payload) + : address(internal::GetPacketAddressAddress(payload)), + el(internal::GetPacketAddressEl(payload)), + ns(internal::GetPacketAddressNs(payload)), + nse(internal::GetPacketAddressNse(payload)) {} + uint64_t address; + ExceptionLevel el; + bool ns; + bool nse; +}; + +struct DataVirtualAddress { + explicit DataVirtualAddress(uint64_t payload) + : address(internal::GetPacketAddressAddress(payload)) {} + uint64_t address; +}; + +struct DataPhysicalAddress { + explicit DataPhysicalAddress(uint64_t payload) + : address(internal::GetPacketAddressAddress(payload)) {} + uint64_t address; +}; + +} // namespace perfetto::trace_processor::perf_importer::spe + +#endif // SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_H_ diff --git a/src/trace_processor/importers/perf/spe_record_parser.cc b/src/trace_processor/importers/perf/spe_record_parser.cc new file mode 100644 index 0000000000..6dc4c6aa85 --- /dev/null +++ b/src/trace_processor/importers/perf/spe_record_parser.cc @@ -0,0 +1,362 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/trace_processor/importers/perf/spe_record_parser.h" + +#include +#include +#include + +#include "perfetto/base/logging.h" +#include "perfetto/trace_processor/trace_blob_view.h" +#include "src/trace_processor/importers/common/mapping_tracker.h" +#include "src/trace_processor/importers/common/process_tracker.h" +#include "src/trace_processor/importers/common/virtual_memory_mapping.h" +#include "src/trace_processor/importers/perf/reader.h" +#include "src/trace_processor/importers/perf/spe.h" +#include "src/trace_processor/storage/trace_storage.h" +#include "src/trace_processor/tables/metadata_tables_py.h" +#include "src/trace_processor/tables/perf_tables_py.h" +#include "src/trace_processor/types/trace_processor_context.h" + +namespace perfetto::trace_processor::perf_importer { + +// static +const char* SpeRecordParserImpl::ToString(spe::DataSource ds) { + switch (ds) { + case spe::DataSource::kUnknown: + return "UNKNOWN"; + case spe::DataSource::kL1D: + return "L1D"; + case spe::DataSource::kL2: + return "L2"; + case spe::DataSource::kPeerCore: + return "PEER_CORE"; + case spe::DataSource::kLocalCluster: + return "LOCAL_CLUSTER"; + case spe::DataSource::kSysCache: + return "SYS_CACHE"; + case spe::DataSource::kPeerCluster: + return "PEER_CLUSTER"; + case spe::DataSource::kRemote: + return "REMOTE"; + case spe::DataSource::kDram: + return "DRAM"; + } + PERFETTO_FATAL("For GCC"); +} + +// static +const char* SpeRecordParserImpl::ToString(spe::ExceptionLevel el) { + switch (el) { + case spe::ExceptionLevel::kEl0: + return "EL0"; + case spe::ExceptionLevel::kEl1: + return "EL1"; + case spe::ExceptionLevel::kEl2: + return "EL2"; + case spe::ExceptionLevel::kEl3: + return "EL3"; + } + PERFETTO_FATAL("For GCC"); +} + +// static +const char* SpeRecordParserImpl::ToString(OperationName name) { + switch (name) { + case OperationName::kOther: + return "OTHER"; + case OperationName::kSveVecOp: + return "SVE_VEC_OP"; + case OperationName::kLoad: + return "LOAD"; + case OperationName::kStore: + return "STORE"; + case OperationName::kBranch: + return "BRANCH"; + case OperationName::kUnknown: + return "UNKNOWN"; + } + PERFETTO_FATAL("For GCC"); +} + +StringId SpeRecordParserImpl::ToStringId(OperationName name) { + if (operation_name_strings_[name] == kNullStringId) { + operation_name_strings_[name] = + context_->storage->InternString(ToString(name)); + } + return operation_name_strings_[name]; +} + +StringId SpeRecordParserImpl::ToStringId(spe::ExceptionLevel el) { + if (exception_level_strings_[el] == kNullStringId) { + exception_level_strings_[el] = + context_->storage->InternString(ToString(el)); + } + return exception_level_strings_[el]; +} + +StringId SpeRecordParserImpl::ToStringId(spe::DataSource ds) { + if (data_source_strings_[ds] == kNullStringId) { + data_source_strings_[ds] = context_->storage->InternString(ToString(ds)); + } + return data_source_strings_[ds]; +} + +SpeRecordParserImpl::SpeRecordParserImpl(TraceProcessorContext* context) + : context_(context), reader_(TraceBlobView()) {} + +void SpeRecordParserImpl::ParseSpeRecord(int64_t ts, TraceBlobView data) { + reader_ = Reader(std::move(data)); + inflight_row_ = {}; + inflight_row_.ts = ts; + inflight_record_ = {}; + + // No need to check that there is enough data as this has been validated by + // the tokenization step. + while (reader_.size_left() != 0) { + uint8_t byte_0; + reader_.Read(byte_0); + + if (spe::IsExtendedHeader(byte_0)) { + uint8_t byte_1; + reader_.Read(byte_1); + spe::ExtendedHeader extended_header(byte_0, byte_1); + ReadExtendedPacket(extended_header); + } else { + ReadShortPacket(spe::ShortHeader(byte_0)); + } + } + if (!inflight_record_.instruction_address) { + context_->storage->mutable_spe_record_table()->Insert(inflight_row_); + return; + } + + const auto& inst = *inflight_record_.instruction_address; + + inflight_row_.exception_level = ToStringId(inst.el); + + if (inst.el == spe::ExceptionLevel::kEl0 && inflight_row_.utid) { + const auto upid = + *context_->storage->thread_table() + .FindById(tables::ThreadTable::Id(*inflight_row_.utid)) + ->upid(); + + VirtualMemoryMapping* mapping = + context_->mapping_tracker->FindUserMappingForAddress(upid, + inst.address); + if (mapping) { + inflight_row_.instruction_frame_id = + mapping->InternFrame(mapping->ToRelativePc(inst.address), ""); + } + } else if (inst.el == spe::ExceptionLevel::kEl1) { + VirtualMemoryMapping* mapping = + context_->mapping_tracker->FindKernelMappingForAddress(inst.address); + if (mapping) { + inflight_row_.instruction_frame_id = + mapping->InternFrame(mapping->ToRelativePc(inst.address), ""); + } + } + + if (!inflight_row_.instruction_frame_id.has_value()) { + inflight_row_.instruction_frame_id = GetDummyMapping()->InternFrame( + GetDummyMapping()->ToRelativePc(inst.address), ""); + } + + context_->storage->mutable_spe_record_table()->Insert(inflight_row_); +} + +void SpeRecordParserImpl::ReadShortPacket(spe::ShortHeader short_header) { + if (short_header.IsAddressPacket()) { + ReadAddressPacket(short_header.GetAddressIndex()); + + } else if (short_header.IsCounterPacket()) { + ReadCounterPacket(short_header.GetCounterIndex()); + + } else if (short_header.IsEventsPacket()) { + ReadEventsPacket(short_header); + + } else if (short_header.IsContextPacket()) { + ReadContextPacket(short_header); + + } else if (short_header.IsOperationTypePacket()) { + ReadOperationTypePacket(short_header); + + } else if (short_header.IsDataSourcePacket()) { + ReadDataSourcePacket(short_header); + + } else { + reader_.Skip(short_header.GetPayloadSize()); + } +} + +void SpeRecordParserImpl::ReadExtendedPacket( + spe::ExtendedHeader extended_header) { + if (extended_header.IsAddressPacket()) { + ReadAddressPacket(extended_header.GetAddressIndex()); + + } else if (extended_header.IsCounterPacket()) { + ReadCounterPacket(extended_header.GetCounterIndex()); + + } else { + reader_.Skip(extended_header.GetPayloadSize()); + } +} + +void SpeRecordParserImpl::ReadAddressPacket(spe::AddressIndex index) { + uint64_t payload; + reader_.Read(payload); + + switch (index) { + case spe::AddressIndex::kInstruction: + inflight_record_.instruction_address = + spe::InstructionVirtualAddress(payload); + break; + + case spe::AddressIndex::kDataVirtual: + inflight_row_.data_virtual_address = + static_cast(spe::DataVirtualAddress(payload).address); + break; + + case spe::AddressIndex::kDataPhysical: + inflight_row_.data_physical_address = + static_cast(spe::DataPhysicalAddress(payload).address); + break; + + case spe::AddressIndex::kBranchTarget: + case spe::AddressIndex::kPrevBranchTarget: + case spe::AddressIndex::kUnknown: + break; + } +} + +void SpeRecordParserImpl::ReadCounterPacket(spe::CounterIndex index) { + uint16_t value; + reader_.Read(value); + switch (index) { + case spe::CounterIndex::kTotalLatency: + inflight_row_.total_latency = value; + break; + + case spe::CounterIndex::kIssueLatency: + inflight_row_.issue_latency = value; + break; + + case spe::CounterIndex::kTranslationLatency: + inflight_row_.translation_latency = value; + break; + + case spe::CounterIndex::kUnknown: + break; + } +} + +void SpeRecordParserImpl::ReadEventsPacket(spe::ShortHeader short_header) { + inflight_row_.events_bitmask = + static_cast(ReadPayload(short_header)); +} + +void SpeRecordParserImpl::ReadContextPacket(spe::ShortHeader short_header) { + uint32_t tid; + reader_.Read(tid); + inflight_row_.utid = context_->process_tracker->GetOrCreateThread(tid); + switch (short_header.GetContextIndex()) { + case spe::ContextIndex::kEl1: + case spe::ContextIndex::kEl2: + case spe::ContextIndex::kUnknown: + break; + } +} + +void SpeRecordParserImpl::ReadOperationTypePacket( + spe::ShortHeader short_header) { + uint8_t payload; + reader_.Read(payload); + inflight_row_.operation = ToStringId(GetOperationName(short_header, payload)); +} + +SpeRecordParserImpl::OperationName SpeRecordParserImpl::GetOperationName( + spe::ShortHeader short_header, + uint8_t payload) const { + switch (short_header.GetOperationClass()) { + case spe::OperationClass::kOther: + switch (spe::OperationTypeOtherPayload(payload).subclass()) { + case spe::OperationOtherSubclass::kOther: + return OperationName::kOther; + case spe::OperationOtherSubclass::kSveVecOp: + return OperationName::kSveVecOp; + case spe::OperationOtherSubclass::kUnknown: + return OperationName::kUnknown; + } + PERFETTO_FATAL("For GCC"); + + case spe::OperationClass::kLoadOrStoreOrAtomic: + if (spe::OperationTypeLdStAtPayload(payload).IsStore()) { + return OperationName::kStore; + } + return OperationName::kLoad; + + case spe::OperationClass::kBranchOrExceptionReturn: + return OperationName::kBranch; + + case spe::OperationClass::kUnknown: + return OperationName::kUnknown; + } + PERFETTO_FATAL("For GCC"); +} + +VirtualMemoryMapping* SpeRecordParserImpl::GetDummyMapping() { + if (!dummy_mapping_) { + dummy_mapping_ = + &context_->mapping_tracker->CreateDummyMapping("spe_dummy"); + } + return dummy_mapping_; +} + +void SpeRecordParserImpl::ReadDataSourcePacket(spe::ShortHeader short_header) { + inflight_row_.data_source = + ToStringId(short_header.GetDataSource(ReadPayload(short_header))); +} + +uint64_t SpeRecordParserImpl::ReadPayload(spe::ShortHeader short_header) { + switch (short_header.GetPayloadSize()) { + case 1: { + uint8_t data; + reader_.Read(data); + return data; + } + case 2: { + uint16_t data; + reader_.Read(data); + return data; + } + case 4: { + uint32_t data; + reader_.Read(data); + return data; + } + case 8: { + uint64_t data; + reader_.Read(data); + return data; + } + default: + break; + } + PERFETTO_FATAL("Unreachable"); +} + +} // namespace perfetto::trace_processor::perf_importer diff --git a/src/trace_processor/importers/perf/spe_record_parser.h b/src/trace_processor/importers/perf/spe_record_parser.h new file mode 100644 index 0000000000..da48962a2b --- /dev/null +++ b/src/trace_processor/importers/perf/spe_record_parser.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_RECORD_PARSER_H_ +#define SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_RECORD_PARSER_H_ + +#include +#include +#include + +#include "perfetto/trace_processor/trace_blob_view.h" +#include "src/trace_processor/importers/common/trace_parser.h" +#include "src/trace_processor/importers/common/virtual_memory_mapping.h" +#include "src/trace_processor/importers/perf/reader.h" +#include "src/trace_processor/importers/perf/spe.h" +#include "src/trace_processor/storage/trace_storage.h" +#include "src/trace_processor/tables/perf_tables_py.h" + +namespace perfetto::trace_processor { +class TraceProcessorContext; +namespace perf_importer { + +class SpeRecordParserImpl : public SpeRecordParser { + public: + explicit SpeRecordParserImpl(TraceProcessorContext* context); + + void ParseSpeRecord(int64_t, TraceBlobView) override; + + private: + template + class CachedStringIdArray { + public: + static constexpr size_t size = static_cast(Enum::kMax) + 1; + explicit CachedStringIdArray() { cache_.fill(kNullStringId); } + StringId& operator[](Enum e) { return cache_[static_cast(e)]; } + + private: + std::array cache_; + }; + + struct InflightSpeRecord { + std::optional instruction_address; + }; + + enum class OperationName { + kOther, + kSveVecOp, + kLoad, + kStore, + kBranch, + kUnknown, + kMax = kUnknown + }; + + static const char* ToString(OperationName name); + static const char* ToString(spe::ExceptionLevel el); + static const char* ToString(spe::DataSource ds); + + StringId ToStringId(OperationName name); + StringId ToStringId(spe::ExceptionLevel el); + StringId ToStringId(spe::DataSource ds); + + void ReadShortPacket(spe::ShortHeader short_header); + void ReadExtendedPacket(spe::ExtendedHeader extended_header); + + void ReadAddressPacket(spe::AddressIndex index); + void ReadCounterPacket(spe::CounterIndex index); + + void ReadEventsPacket(spe::ShortHeader short_header); + void ReadContextPacket(spe::ShortHeader short_header); + void ReadOperationTypePacket(spe::ShortHeader short_header); + void ReadDataSourcePacket(spe::ShortHeader short_header); + + uint64_t ReadPayload(spe::ShortHeader short_header); + + OperationName GetOperationName(spe::ShortHeader short_header, + uint8_t payload) const; + + VirtualMemoryMapping* GetDummyMapping(); + + TraceProcessorContext* const context_; + CachedStringIdArray operation_name_strings_; + CachedStringIdArray data_source_strings_; + CachedStringIdArray exception_level_strings_; + + Reader reader_; + tables::SpeRecordTable::Row inflight_row_; + InflightSpeRecord inflight_record_; + + VirtualMemoryMapping* dummy_mapping_ = nullptr; +}; + +} // namespace perf_importer +} // namespace perfetto::trace_processor + +#endif // SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_RECORD_PARSER_H_ diff --git a/src/trace_processor/importers/perf/spe_tokenizer.cc b/src/trace_processor/importers/perf/spe_tokenizer.cc new file mode 100644 index 0000000000..8186bbb46f --- /dev/null +++ b/src/trace_processor/importers/perf/spe_tokenizer.cc @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/trace_processor/importers/perf/spe_tokenizer.h" + +#include +#include +#include +#include + +#include "perfetto/base/logging.h" +#include "perfetto/base/status.h" +#include "perfetto/ext/base/status_or.h" +#include "perfetto/trace_processor/trace_blob_view.h" +#include "src/trace_processor/importers/common/clock_tracker.h" +#include "src/trace_processor/importers/perf/aux_data_tokenizer.h" +#include "src/trace_processor/importers/perf/aux_record.h" +#include "src/trace_processor/importers/perf/itrace_start_record.h" +#include "src/trace_processor/importers/perf/spe.h" +#include "src/trace_processor/sorter/trace_sorter.h" +#include "src/trace_processor/storage/stats.h" +#include "src/trace_processor/types/trace_processor_context.h" + +namespace perfetto::trace_processor::perf_importer { + +void SpeTokenizer::OnDataLoss(uint64_t) { + // Clear any inflight parsing. + buffer_.PopFrontUntil(buffer_.end_offset()); +} + +base::Status SpeTokenizer::OnItraceStartRecord(ItraceStartRecord) { + // Clear any inflight parsing. + buffer_.PopFrontUntil(buffer_.end_offset()); + return base::OkStatus(); +} + +base::Status SpeTokenizer::Parse(AuxRecord aux, TraceBlobView data) { + last_aux_record_ = std::move(aux); + buffer_.PushBack(std::move(data)); + while (ProcessRecord()) { + } + return base::OkStatus(); +} + +bool SpeTokenizer::ProcessRecord() { + for (auto it = buffer_.begin(); it;) { + uint8_t byte_0 = *it; + // Must be true (we passed the for loop condition). + it.MaybeAdvance(1); + + if (spe::IsExtendedHeader(byte_0)) { + if (!it) { + return false; + } + uint8_t byte_1 = *it; + uint8_t payload_size = + spe::ExtendedHeader(byte_0, byte_1).GetPayloadSize(); + if (!it.MaybeAdvance(payload_size + 1)) { + return false; + } + continue; + } + + spe::ShortHeader short_header(byte_0); + uint8_t payload_size = short_header.GetPayloadSize(); + if (!it.MaybeAdvance(payload_size)) { + return false; + } + + if (short_header.IsEndPacket()) { + size_t record_len = it.file_offset() - buffer_.start_offset(); + TraceBlobView record = + *buffer_.SliceOff(buffer_.start_offset(), record_len); + buffer_.PopFrontUntil(it.file_offset()); + Emit(std::move(record), std::nullopt); + return true; + } + + if (short_header.IsTimestampPacket()) { + size_t record_len = it.file_offset() - buffer_.start_offset(); + TraceBlobView record = + *buffer_.SliceOff(buffer_.start_offset(), record_len); + buffer_.PopFrontUntil(it.file_offset()); + Emit(std::move(record), ReadTimestamp(record)); + return true; + } + } + return false; +} + +uint64_t SpeTokenizer::ReadTimestamp(const TraceBlobView& record) { + PERFETTO_CHECK(record.size() >= 8); + uint64_t timestamp; + memcpy(×tamp, record.data() + record.size() - 8, 8); + return timestamp; +} + +base::Status SpeTokenizer::NotifyEndOfStream() { + return base::OkStatus(); +} + +void SpeTokenizer::Emit(TraceBlobView record, std::optional cycles) { + PERFETTO_CHECK(last_aux_record_); + + std::optional perf_time; + + if (cycles.has_value()) { + perf_time = stream_.ConvertTscToPerfTime(*cycles); + } else { + context_->storage->IncrementStats(stats::spe_no_timestamp); + } + + if (!perf_time && last_aux_record_->sample_id.has_value()) { + perf_time = last_aux_record_->sample_id->time(); + } + + if (!perf_time) { + context_->sorter->PushSpeRecord(context_->sorter->max_timestamp(), + std::move(record)); + return; + } + + base::StatusOr trace_time = context_->clock_tracker->ToTraceTime( + last_aux_record_->attr->clock_id(), static_cast(*perf_time)); + if (!trace_time.ok()) { + context_->storage->IncrementStats(stats::spe_record_droped); + return; + } + context_->sorter->PushSpeRecord(*trace_time, std::move(record)); +} + +} // namespace perfetto::trace_processor::perf_importer diff --git a/src/trace_processor/importers/perf/spe_tokenizer.h b/src/trace_processor/importers/perf/spe_tokenizer.h new file mode 100644 index 0000000000..6af33499dd --- /dev/null +++ b/src/trace_processor/importers/perf/spe_tokenizer.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_TOKENIZER_H_ +#define SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_TOKENIZER_H_ + +#include +#include + +#include "perfetto/base/status.h" +#include "perfetto/trace_processor/trace_blob_view.h" +#include "src/trace_processor/importers/perf/aux_data_tokenizer.h" +#include "src/trace_processor/importers/perf/aux_record.h" +#include "src/trace_processor/importers/perf/aux_stream_manager.h" +#include "src/trace_processor/importers/perf/perf_session.h" +#include "src/trace_processor/util/trace_blob_view_reader.h" + +namespace perfetto ::trace_processor { +class TraceProcessorContext; +namespace perf_importer { + +class SpeTokenizer : public AuxDataTokenizer { + public: + explicit SpeTokenizer(TraceProcessorContext* context, AuxStream* stream) + : context_(context), stream_(*stream) {} + void OnDataLoss(uint64_t) override; + base::Status Parse(AuxRecord record, TraceBlobView data) override; + base::Status NotifyEndOfStream() override; + base::Status OnItraceStartRecord(ItraceStartRecord) override; + + private: + // A SPE trace is just a stream of SPE records which in turn are a collection + // of packets. An End or Timestamp packet signals the end of the current + // record. This method will read the stream until an end of record condition, + // emit the record to the sorter, consume the bytes from the buffer, and + // finally return true. If not enough data is available to parse a full record + // it returns false and the internal buffer is not modified. + bool ProcessRecord(); + uint64_t ReadTimestamp(const TraceBlobView& record); + + // Emits a record to the sorter. You can optionally pass the cycles value + // contained in the timestamp packet which will be used to determine the trace + // timestamp. + void Emit(TraceBlobView data, std::optional cycles); + TraceProcessorContext* const context_; + AuxStream& stream_; + util::TraceBlobViewReader buffer_; + std::optional last_aux_record_; +}; + +using SpeTokenizerFactory = SimpleAuxDataTokenizerFactory; + +} // namespace perf_importer +} // namespace perfetto::trace_processor + +#endif // SRC_TRACE_PROCESSOR_IMPORTERS_PERF_SPE_TOKENIZER_H_ diff --git a/src/trace_processor/perfetto_sql/stdlib/linux/perf/BUILD.gn b/src/trace_processor/perfetto_sql/stdlib/linux/perf/BUILD.gn index ca5843833a..73df9a8251 100644 --- a/src/trace_processor/perfetto_sql/stdlib/linux/perf/BUILD.gn +++ b/src/trace_processor/perfetto_sql/stdlib/linux/perf/BUILD.gn @@ -15,5 +15,8 @@ import("../../../../../../gn/perfetto_sql.gni") perfetto_sql_source_set("perf") { - sources = [ "samples.sql" ] + sources = [ + "samples.sql", + "spe.sql", + ] } diff --git a/src/trace_processor/perfetto_sql/stdlib/linux/perf/spe.sql b/src/trace_processor/perfetto_sql/stdlib/linux/perf/spe.sql new file mode 100644 index 0000000000..d16b0cab6e --- /dev/null +++ b/src/trace_processor/perfetto_sql/stdlib/linux/perf/spe.sql @@ -0,0 +1,125 @@ +-- +-- Copyright 2024 The Android Open Source Project +-- +-- Licensed under the Apache License, Version 2.0 (the 'License'); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- https://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an 'AS IS' BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +-- Contains ARM Statistical Profiling Extension records +CREATE PERFETTO VIEW linux_perf_spe_record( + -- Timestap when the operation was sampled + ts LONG, + -- Thread the operation executed in + utid INT, + -- Exception level the instruction was executed in + exception_level STRING, + -- Instruction virtual address + instruction_frame_id INT, + -- Type of operation sampled + operation STRING, + -- The virtual address accessed by the operation (0 if no memory access was + -- performed) + data_virtual_address LONG, + -- The physical address accessed by the operation (0 if no memory access was + -- performed) + data_physical_address LONG, + -- Cycle count from the operation being dispatched for issue to the operation + -- being complete. + total_latency INT, + -- Cycle count from the operation being dispatched for issue to the operation + -- being issued for execution. + issue_latency INT, + -- Cycle count from a virtual address being passed to the MMU for translation + -- to the result of the translation being available. + translation_latency INT, + -- Where the data returned for a load operation was sourced + data_source STRING, + -- Operation generated an exception + exception_gen BOOL, + -- Operation architecturally retired + retired BOOL, + -- Operation caused a level 1 data cache access + l1d_access BOOL, + -- Operation caused a level 1 data cache refill + l1d_refill BOOL, + -- Operation caused a TLB access + tlb_access BOOL, + -- Operation caused a TLB refill involving at least one translation table walk + tlb_refill BOOL, + -- Conditional instruction failed its condition code check + not_taken BOOL, + -- Whether a branch caused a correction to the predicted program flow + mispred BOOL, + -- Operation caused a last level data or unified cache access + llc_access BOOL, + -- Whether the operation could not be completed by the last level data cache + -- (or any above) + llc_refill BOOL, + -- Operation caused an access to another socket in a multi-socket system + remote_access BOOL, + -- Operation that incurred additional latency due to the alignment of the + -- address and the size of the data being accessed + alignment BOOL, + -- Whether the operation executed in transactional state + tme_transaction BOOL, + -- SVE or SME operation with at least one false element in the governing + -- predicate(s) + sve_partial_pred BOOL, + -- SVE or SME operation with no true element in the governing predicate(s) + sve_empty_pred BOOL, + -- Whether a load operation caused a cache access to at least the level 2 data + -- or unified cache + l2d_access BOOL, + -- Whether a load operation accessed and missed the level 2 data or unified + -- cache. Not set for accesses that are satisfied from refilling data of a + -- previous miss + l2d_hit BOOL, + -- Whether a load operation accessed modified data in a cache + cache_data_modified BOOL, + -- Wheter a load operation hit a recently fetched line in a cache + recenty_fetched BOOL, + -- Whether a load operation snooped data from a cache outside the cache + -- hierarchy of this core + data_snooped BOOL +) AS +SELECT + ts, + utid, + exception_level, + instruction_frame_id, + operation, + data_virtual_address, + data_physical_address, + total_latency, + issue_latency, + translation_latency, + data_source, + (events_bitmask & (1 << 0)) <> 0 AS exception_gen, + (events_bitmask & (1 << 1)) <> 0 AS retired, + (events_bitmask & (1 << 2)) <> 0 AS l1d_access, + (events_bitmask & (1 << 3)) <> 0 AS l1d_refill, + (events_bitmask & (1 << 4)) <> 0 AS tlb_access, + (events_bitmask & (1 << 5)) <> 0 AS tlb_refill, + (events_bitmask & (1 << 6)) <> 0 AS not_taken, + (events_bitmask & (1 << 7)) <> 0 AS mispred, + (events_bitmask & (1 << 8)) <> 0 AS llc_access, + (events_bitmask & (1 << 9)) <> 0 AS llc_refill, + (events_bitmask & (1 << 10)) <> 0 AS remote_access, + (events_bitmask & (1 << 11)) <> 0 AS alignment, + (events_bitmask & (1 << 17)) <> 0 AS tme_transaction, + (events_bitmask & (1 << 17)) <> 0 AS sve_partial_pred, + (events_bitmask & (1 << 18)) <> 0 AS sve_empty_pred, + (events_bitmask & (1 << 19)) <> 0 AS l2d_access, + (events_bitmask & (1 << 20)) <> 0 AS l2d_hit, + (events_bitmask & (1 << 21)) <> 0 AS cache_data_modified, + (events_bitmask & (1 << 22)) <> 0 AS recenty_fetched, + (events_bitmask & (1 << 23)) <> 0 AS data_snooped +FROM __intrinsic_spe_record; diff --git a/src/trace_processor/sorter/trace_sorter.cc b/src/trace_processor/sorter/trace_sorter.cc index 00d12cce7b..d5e27f09f1 100644 --- a/src/trace_processor/sorter/trace_sorter.cc +++ b/src/trace_processor/sorter/trace_sorter.cc @@ -26,6 +26,7 @@ #include "perfetto/base/compiler.h" #include "perfetto/base/logging.h" #include "perfetto/public/compiler.h" +#include "perfetto/trace_processor/trace_blob_view.h" #include "src/trace_processor/importers/android_bugreport/android_log_event.h" #include "src/trace_processor/importers/common/parser_types.h" #include "src/trace_processor/importers/common/trace_parser.h" @@ -245,6 +246,10 @@ void TraceSorter::ParseTracePacket(TraceProcessorContext& context, event.ts, std::move(token_buffer_.Extract(id).value)); return; + case TimestampedEvent::Type::kSpeRecord: + context.spe_record_parser->ParseSpeRecord( + event.ts, token_buffer_.Extract(id)); + return; case TimestampedEvent::Type::kSystraceLine: context.json_trace_parser->ParseSystraceLine( event.ts, token_buffer_.Extract(id)); @@ -279,6 +284,7 @@ void TraceSorter::ParseEtwPacket(TraceProcessorContext& context, case TimestampedEvent::Type::kInlineSchedWaking: case TimestampedEvent::Type::kFtraceEvent: case TimestampedEvent::Type::kTrackEvent: + case TimestampedEvent::Type::kSpeRecord: case TimestampedEvent::Type::kSystraceLine: case TimestampedEvent::Type::kTracePacket: case TimestampedEvent::Type::kPerfRecord: @@ -312,6 +318,7 @@ void TraceSorter::ParseFtracePacket(TraceProcessorContext& context, return; case TimestampedEvent::Type::kEtwEvent: case TimestampedEvent::Type::kTrackEvent: + case TimestampedEvent::Type::kSpeRecord: case TimestampedEvent::Type::kSystraceLine: case TimestampedEvent::Type::kTracePacket: case TimestampedEvent::Type::kPerfRecord: @@ -347,6 +354,9 @@ void TraceSorter::ExtractAndDiscardTokenizedObject( case TimestampedEvent::Type::kJsonValueWithDur: base::ignore_result(token_buffer_.Extract(id)); return; + case TimestampedEvent::Type::kSpeRecord: + base::ignore_result(token_buffer_.Extract(id)); + return; case TimestampedEvent::Type::kSystraceLine: base::ignore_result(token_buffer_.Extract(id)); return; diff --git a/src/trace_processor/sorter/trace_sorter.h b/src/trace_processor/sorter/trace_sorter.h index 751056c389..588c8a39b7 100644 --- a/src/trace_processor/sorter/trace_sorter.h +++ b/src/trace_processor/sorter/trace_sorter.h @@ -129,6 +129,15 @@ class TraceSorter { machine_id); } + inline void PushSpeRecord( + int64_t timestamp, + TraceBlobView record, + std::optional machine_id = std::nullopt) { + TraceTokenBuffer::Id id = token_buffer_.Append(std::move(record)); + AppendNonFtraceEvent(timestamp, TimestampedEvent::Type::kSpeRecord, id, + machine_id); + } + inline void PushInstrumentsRow( int64_t timestamp, instruments_importer::Row row, @@ -305,21 +314,22 @@ class TraceSorter { private: struct TimestampedEvent { enum class Type : uint8_t { + kAndroidLogEvent, + kEtwEvent, kFtraceEvent, - kPerfRecord, - kInstrumentsRow, - kTracePacket, + kFuchsiaRecord, kInlineSchedSwitch, kInlineSchedWaking, + kInstrumentsRow, kJsonValue, kJsonValueWithDur, - kFuchsiaRecord, - kTrackEvent, - kSystraceLine, - kEtwEvent, - kAndroidLogEvent, kLegacyV8CpuProfileEvent, - kMax = kLegacyV8CpuProfileEvent, + kPerfRecord, + kSpeRecord, + kSystraceLine, + kTracePacket, + kTrackEvent, + kMax = kTrackEvent, }; // Number of bits required to store the max element in |Type|. diff --git a/src/trace_processor/storage/stats.h b/src/trace_processor/storage/stats.h index 4b44e6c7bf..c8f45a347e 100644 --- a/src/trace_processor/storage/stats.h +++ b/src/trace_processor/storage/stats.h @@ -306,6 +306,13 @@ namespace perfetto::trace_processor::stats { "PREF_RECORD_AUXTRACE messages."), \ F(perf_unknown_aux_data, kIndexed, kDataLoss, kTrace, \ "AUX data type encountered for which there is no known parser."), \ + F(perf_no_tsc_data, kSingle, kInfo, kTrace, \ + "TSC data unavailable. Will be unable to translate HW clocks."), \ + F(spe_no_timestamp, kSingle, kInfo, kTrace, \ + "SPE record with no timestamp. Will try our best to assign a " \ + "timestamp."), \ + F(spe_record_droped, kSingle, kDataLoss, kTrace, \ + "SPE record dropped. E.g. Unable to assign it a timestamp."), \ F(memory_snapshot_parser_failure, kSingle, kError, kAnalysis, ""), \ F(thread_time_in_state_out_of_order, kSingle, kError, kAnalysis, ""), \ F(thread_time_in_state_unknown_cpu_freq, \ diff --git a/src/trace_processor/storage/trace_storage.h b/src/trace_processor/storage/trace_storage.h index 2a07c33543..2e2a3ec55d 100644 --- a/src/trace_processor/storage/trace_storage.h +++ b/src/trace_processor/storage/trace_storage.h @@ -48,6 +48,7 @@ #include "src/trace_processor/tables/jit_tables_py.h" #include "src/trace_processor/tables/memory_tables_py.h" #include "src/trace_processor/tables/metadata_tables_py.h" +#include "src/trace_processor/tables/perf_tables_py.h" #include "src/trace_processor/tables/profiler_tables_py.h" #include "src/trace_processor/tables/sched_tables_py.h" #include "src/trace_processor/tables/slice_tables_py.h" @@ -846,6 +847,13 @@ class TraceStorage { } tables::JitFrameTable* mutable_jit_frame_table() { return &jit_frame_table_; } + const tables::SpeRecordTable& spe_record_table() const { + return spe_record_table_; + } + tables::SpeRecordTable* mutable_spe_record_table() { + return &spe_record_table_; + } + const tables::InputMethodClientsTable& inputmethod_clients_table() const { return inputmethod_clients_table_; } @@ -1209,6 +1217,9 @@ class TraceStorage { tables::JitCodeTable jit_code_table_{&string_pool_}; tables::JitFrameTable jit_frame_table_{&string_pool_}; + // Perf tables + tables::SpeRecordTable spe_record_table_{&string_pool_}; + // Winscope tables tables::InputMethodClientsTable inputmethod_clients_table_{&string_pool_}; tables::InputMethodManagerServiceTable inputmethod_manager_service_table_{ diff --git a/src/trace_processor/tables/BUILD.gn b/src/trace_processor/tables/BUILD.gn index 72796e5507..5b4a13382e 100644 --- a/src/trace_processor/tables/BUILD.gn +++ b/src/trace_processor/tables/BUILD.gn @@ -23,6 +23,7 @@ perfetto_tp_tables("tables_python") { "jit_tables.py", "memory_tables.py", "metadata_tables.py", + "perf_tables.py", "profiler_tables.py", "sched_tables.py", "slice_tables.py", diff --git a/src/trace_processor/tables/perf_tables.py b/src/trace_processor/tables/perf_tables.py new file mode 100644 index 0000000000..60b10f70d3 --- /dev/null +++ b/src/trace_processor/tables/perf_tables.py @@ -0,0 +1,101 @@ +# Copyright (C) 2024 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Contains tables related to perf data ingestion. +""" + +from python.generators.trace_processor_table.public import Column as C +from python.generators.trace_processor_table.public import ColumnDoc +from python.generators.trace_processor_table.public import ColumnFlag +from python.generators.trace_processor_table.public import CppInt64 +from python.generators.trace_processor_table.public import CppOptional +from python.generators.trace_processor_table.public import CppString +from python.generators.trace_processor_table.public import CppTableId +from python.generators.trace_processor_table.public import CppUint32 +from python.generators.trace_processor_table.public import Table +from python.generators.trace_processor_table.public import TableDoc +from .profiler_tables import STACK_PROFILE_FRAME_TABLE +from .metadata_tables import THREAD_TABLE + +SPE_RECORD_TABLE = Table( + python_module=__file__, + class_name='SpeRecordTable', + sql_name='__intrinsic_spe_record', + columns=[ + C('ts', CppInt64(), ColumnFlag.SORTED), + C('utid', CppOptional(CppTableId(THREAD_TABLE))), + C('exception_level', CppString()), + C('instruction_frame_id', + CppOptional(CppTableId(STACK_PROFILE_FRAME_TABLE))), + C('operation', CppString()), + C('data_virtual_address', CppInt64()), + C('data_physical_address', CppInt64()), + C('total_latency', CppUint32()), + C('issue_latency', CppUint32()), + C('translation_latency', CppUint32()), + C('events_bitmask', CppInt64()), + C('data_source', CppString()), + ], + tabledoc=TableDoc( + doc=''' + This table has a row for each sampled operation in an ARM Statistical + Profiling Extension trace. + ''', + group='Perf', + columns={ + 'ts': + 'Time the operation was sampled', + 'utid': + 'EXecuting thread', + 'exception_level': + 'Exception level the operation executed in', + 'instruction_frame_id': + ColumnDoc( + 'Instruction virtual address', + joinable='stack_profile_frame.id'), + 'operation': + 'Operation executed', + 'data_virtual_address': + 'Virtual address of accesses data (if any)', + 'data_physical_address': + ''' + Physical address of accesses data (if any) + ''', + 'total_latency': + ''' + Cycle count from the operation being dispatched for issue to + the operation being complete. + ''', + 'issue_latency': + ''' + Cycle count from the operation being dispatched for issue to + the operation being issued for execution. + ''', + 'translation_latency': + ''' + Cycle count from a virtual address being passed to the MMU for + translation to the result of the translation being available. + ''', + 'events_bitmask': + 'Events generated by the operation', + 'data_source': + ''' + Where the data returned for a load operation was sourced + ''', + }, + ), +) + +# Keep this list sorted. +ALL_TABLES = [SPE_RECORD_TABLE] diff --git a/src/trace_processor/tables/table_destructors.cc b/src/trace_processor/tables/table_destructors.cc index f993a51507..e49761c003 100644 --- a/src/trace_processor/tables/table_destructors.cc +++ b/src/trace_processor/tables/table_destructors.cc @@ -20,6 +20,7 @@ #include "src/trace_processor/tables/jit_tables_py.h" #include "src/trace_processor/tables/memory_tables_py.h" #include "src/trace_processor/tables/metadata_tables_py.h" +#include "src/trace_processor/tables/perf_tables_py.h" #include "src/trace_processor/tables/profiler_tables_py.h" #include "src/trace_processor/tables/sched_tables_py.h" #include "src/trace_processor/tables/slice_tables_py.h" @@ -66,6 +67,9 @@ ClockSnapshotTable::~ClockSnapshotTable() = default; MachineTable::~MachineTable() = default; TraceFileTable::~TraceFileTable() = default; +// perf_tables.py +SpeRecordTable::~SpeRecordTable() = default; + // profiler_tables_py.h StackProfileMappingTable::~StackProfileMappingTable() = default; StackProfileFrameTable::~StackProfileFrameTable() = default; diff --git a/src/trace_processor/trace_processor_impl.cc b/src/trace_processor/trace_processor_impl.cc index f41f1c85b5..ba89252f3a 100644 --- a/src/trace_processor/trace_processor_impl.cc +++ b/src/trace_processor/trace_processor_impl.cc @@ -57,6 +57,7 @@ #include "src/trace_processor/importers/ninja/ninja_log_parser.h" #include "src/trace_processor/importers/perf/perf_data_tokenizer.h" #include "src/trace_processor/importers/perf/record_parser.h" +#include "src/trace_processor/importers/perf/spe_record_parser.h" #include "src/trace_processor/importers/proto/additional_modules.h" #include "src/trace_processor/importers/proto/content_analyzer.h" #include "src/trace_processor/importers/systrace/systrace_trace_parser.h" @@ -403,6 +404,8 @@ TraceProcessorImpl::TraceProcessorImpl(const Config& cfg) kPerfDataTraceType); context_.perf_record_parser = std::make_unique(&context_); + context_.spe_record_parser = + std::make_unique(&context_); #if PERFETTO_BUILDFLAG(PERFETTO_TP_INSTRUMENTS) context_.reader_registry @@ -964,6 +967,8 @@ void TraceProcessorImpl::InitPerfettoSqlEngine() { RegisterStaticTable(storage->mutable_jit_code_table()); RegisterStaticTable(storage->mutable_jit_frame_table()); + RegisterStaticTable(storage->mutable_spe_record_table()); + RegisterStaticTable(storage->mutable_inputmethod_clients_table()); RegisterStaticTable(storage->mutable_inputmethod_manager_service_table()); RegisterStaticTable(storage->mutable_inputmethod_service_table()); diff --git a/src/trace_processor/types/trace_processor_context.h b/src/trace_processor/types/trace_processor_context.h index 3e7ba55f0b..9069ef68c2 100644 --- a/src/trace_processor/types/trace_processor_context.h +++ b/src/trace_processor/types/trace_processor_context.h @@ -63,6 +63,7 @@ class ProtoTraceParser; class SchedEventTracker; class SliceTracker; class SliceTranslationTable; +class SpeRecordParser; class StackProfileTracker; class TraceFileTracker; class TraceReaderRegistry; @@ -169,6 +170,7 @@ class TraceProcessorContext { std::unique_ptr json_trace_parser; std::unique_ptr fuchsia_record_parser; std::unique_ptr perf_record_parser; + std::unique_ptr spe_record_parser; std::unique_ptr instruments_row_parser; std::unique_ptr android_log_event_parser; diff --git a/src/trace_processor/util/trace_blob_view_reader.cc b/src/trace_processor/util/trace_blob_view_reader.cc index 4ba5b13f11..78a1e1f1ce 100644 --- a/src/trace_processor/util/trace_blob_view_reader.cc +++ b/src/trace_processor/util/trace_blob_view_reader.cc @@ -62,6 +62,11 @@ bool TraceBlobViewReader::PopFrontUntil(const size_t target_offset) { std::optional TraceBlobViewReader::SliceOff( size_t offset, size_t length) const { + // If the length is zero, then a zero-sized blob view is always approrpriate. + if (PERFETTO_UNLIKELY(length == 0)) { + return TraceBlobView(); + } + PERFETTO_DCHECK(offset >= start_offset()); // Fast path: the slice fits entirely inside the first TBV, we can just slice @@ -75,11 +80,6 @@ std::optional TraceBlobViewReader::SliceOff( length); } - // If the length is zero, then a zero-sized blob view is always approrpriate. - if (PERFETTO_UNLIKELY(length == 0)) { - return TraceBlobView(); - } - // If we don't have any TBVs or the end of the slice does not fit, then we // cannot possibly return a full slice. if (PERFETTO_UNLIKELY(data_.empty() || offset + length > end_offset_)) { diff --git a/src/trace_processor/util/trace_blob_view_reader.h b/src/trace_processor/util/trace_blob_view_reader.h index c39ffaba7c..69e5aa35f0 100644 --- a/src/trace_processor/util/trace_blob_view_reader.h +++ b/src/trace_processor/util/trace_blob_view_reader.h @@ -18,9 +18,13 @@ #define SRC_TRACE_PROCESSOR_UTIL_TRACE_BLOB_VIEW_READER_H_ #include +#include +#include #include +#include "perfetto/base/logging.h" #include "perfetto/ext/base/circular_queue.h" +#include "perfetto/public/compiler.h" #include "perfetto/trace_processor/trace_blob_view.h" namespace perfetto::trace_processor::util { @@ -31,7 +35,74 @@ namespace perfetto::trace_processor::util { // 2) Stitching together the cross-chunk spanning pieces. // 3) Dropping data when it is no longer necessary to be buffered. class TraceBlobViewReader { + private: + struct Entry { + // File offset of the first byte in `data`. + size_t start_offset; + TraceBlobView data; + size_t end_offset() const { return start_offset + data.size(); } + }; + public: + class Iterator { + public: + Iterator(const Iterator&) = default; + Iterator(Iterator&&) = default; + Iterator& operator=(const Iterator&) = default; + Iterator& operator=(Iterator&&) = default; + + ~Iterator() = default; + + uint8_t operator*() const { + PERFETTO_DCHECK(file_offset_ < iter_->end_offset()); + return iter_->data.data()[file_offset_ - iter_->start_offset]; + } + + explicit operator bool() const { return file_offset_ != end_offset_; } + + size_t file_offset() const { return file_offset_; } + + bool MaybeAdvance(size_t delta) { + if (delta == 0) { + return true; + } + if (delta > end_offset_ - file_offset_) { + return false; + } + file_offset_ += delta; + if (PERFETTO_LIKELY(file_offset_ < iter_->end_offset())) { + return true; + } + while (file_offset_ > iter_->end_offset()) { + ++iter_; + } + if (file_offset_ == iter_->end_offset()) { + ++iter_; + } + + return true; + } + + private: + friend TraceBlobViewReader; + Iterator(base::CircularQueue::Iterator iter, + size_t file_offset, + size_t end_offset) + : iter_(std::move(iter)), + file_offset_(file_offset), + end_offset_(end_offset) {} + base::CircularQueue::Iterator iter_; + size_t file_offset_; + size_t end_offset_; + }; + + Iterator begin() const { + return Iterator(data_.begin(), start_offset(), end_offset()); + } + Iterator end() const { + return Iterator(data_.end(), end_offset(), end_offset()); + } + // Adds a `TraceBlobView` at the back. void PushBack(TraceBlobView); @@ -58,7 +129,6 @@ class TraceBlobViewReader { // // NOTE: If `offset` < 'file_offset()' this method will CHECK fail. std::optional SliceOff(size_t offset, size_t length) const; - // Returns the offset to the start of the available data. size_t start_offset() const { return data_.empty() ? end_offset_ : data_.front().start_offset; @@ -73,13 +143,6 @@ class TraceBlobViewReader { bool empty() const { return data_.empty(); } private: - struct Entry { - // File offset of the first byte in `data`. - size_t start_offset; - TraceBlobView data; - }; - using Iterator = base::CircularQueue::Iterator; - // CircularQueue has no const_iterator, so mutable is needed to access it from // const methods. mutable base::CircularQueue data_; diff --git a/test/data/simpleperf/spe.trace.zip.sha256 b/test/data/simpleperf/spe.trace.zip.sha256 new file mode 100644 index 0000000000..62d7c6d5e8 --- /dev/null +++ b/test/data/simpleperf/spe.trace.zip.sha256 @@ -0,0 +1 @@ +199e74a411f20e4670c9330e891b371f638bd8745a82dc22bb5f83c0ca8ba5ac \ No newline at end of file diff --git a/test/trace_processor/diff_tests/parser/simpleperf/tests.py b/test/trace_processor/diff_tests/parser/simpleperf/tests.py index 4d21fd5657..18ef4e5088 100644 --- a/test/trace_processor/diff_tests/parser/simpleperf/tests.py +++ b/test/trace_processor/diff_tests/parser/simpleperf/tests.py @@ -263,4 +263,63 @@ def test_etm_dummy_parsing(self): "perf_aux_lost",0 "perf_aux_missing",0 "perf_auxtrace_missing",0 - ''')) \ No newline at end of file + ''')) + + def test_spe_operation(self): + return DiffTestBlueprint( + trace=DataPath('simpleperf/spe.trace.zip'), + query=''' + INCLUDE PERFETTO MODULE linux.perf.spe; + SELECT + operation, + count(*) AS cnt + FROM linux_perf_spe_record + GROUP BY operation + ORDER BY operation + ''', + out=Csv(''' + "operation","cnt" + "BRANCH",68038 + "LOAD",54 + "STORE",47 + ''')) + + def test_spe_pc(self): + return DiffTestBlueprint( + trace=DataPath('simpleperf/spe.trace.zip'), + query=''' + INCLUDE PERFETTO MODULE linux.perf.spe; + SELECT + printf('0x%08x', rel_pc + m.start - exact_offset) AS pc, + exception_level, + COUNT(*) AS cnt + FROM linux_perf_spe_record r, stack_profile_frame f + ON r.instruction_frame_id = f.id, + stack_profile_mapping m + ON f.mapping = m.id + GROUP BY pc, exception_level + HAVING cnt > 1 + ORDER BY pc, exception_level + ''', + out=Csv(''' + "pc","exception_level","cnt" + "0x5cfc344464","EL0",2157 + "0x5cfc344528","EL0",2166 + "0x5cfc3445c4","EL0",2154 + "0x5cfc3446c8","EL0",2108 + "0x5cfc3447a8","EL0",2209 + "0x5cfc344854","EL0",2178 + "0x5cfc34492c","EL0",2246 + "0x5cfc344c14","EL0",4461 + "0x5cfc344cd0","EL0",4416 + "0x5cfc344d7c","EL0",4399 + "0x5cfc344df4","EL0",2 + "0x5cfc344e90","EL0",4427 + "0x5cfc3450e8","EL0",8756 + "0x5cfc345194","EL0",8858 + "0x5cfc345240","EL0",8776 + "0x5cfc345354","EL0",8659 + "0xffffd409990628","EL1",14 + "0xffffd40999062c","EL1",15 + "0xffffd40fb0f124","EL1",2 + '''))