diff --git a/api/docs/release.dox b/api/docs/release.dox index ea1b24f2461..d9199a3082b 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -141,6 +141,9 @@ Further non-compatibility-affecting changes include: -replay_file, -cpu_schedule_file). - Added additional timestamps to drmemtrace traces: at the end of each buffer, and before and after each system call. + - Added type_is_read() API that returns true if a trace type reads from memory. + - Added instr_num_memory_read_access() and instr_num_memory_write_access() that return + the number of memory read and write accesses of an instruction respectively. **************************************************
diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 12a9cc8ba0b..674913bd277 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -644,6 +644,17 @@ type_is_data(const trace_type_t type) type == TRACE_TYPE_DATA_FLUSH_END; } +/** + * Returns whether the type represents a memory read access. + */ +static inline bool +type_is_read(const trace_type_t type) +{ + return type_is_prefetch(type) || type == TRACE_TYPE_READ || + type == TRACE_TYPE_INSTR_FLUSH || type == TRACE_TYPE_INSTR_FLUSH_END || + type == TRACE_TYPE_DATA_FLUSH || type == TRACE_TYPE_DATA_FLUSH_END; +} + static inline bool marker_type_is_function_marker(const trace_marker_type_t mark) { diff --git a/clients/drcachesim/tests/invariant_checker_test.cpp b/clients/drcachesim/tests/invariant_checker_test.cpp index 19e9ee019e0..78a115138a4 100644 --- a/clients/drcachesim/tests/invariant_checker_test.cpp +++ b/clients/drcachesim/tests/invariant_checker_test.cpp @@ -277,22 +277,27 @@ check_sane_control_flow() // Negative test: branches with encodings which do not go to their targets. { - std::vector memrefs = { - gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), -# if defined(X86_64) || defined(X86_32) - // 0x74 is "je" with the 2nd byte the offset. - gen_branch_encoded(TID, 0x71019dbc, { 0x74, 0x32 }), - gen_instr_encoded(0x71019ded, { 0x01 }, TID), -# elif defined(ARM_64) - // 71019dbc: 540001a1 b.ne 71019df0 - // <__executable_start+0x19df0> - gen_branch_encoded(TID, 0x71019dbc, 0x540001a1), - gen_instr_encoded(0x71019ded, 0x01, TID), -# else - // TODO i#5871: Add AArch32 (and RISC-V) encodings. -# endif - }; + instr_t *move1 = XINST_CREATE_move(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + opnd_create_reg(REG2)); + instr_t *move2 = XINST_CREATE_move(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + opnd_create_reg(REG2)); + instr_t *cond_jmp = + XINST_CREATE_jump_cond(GLOBAL_DCONTEXT, DR_PRED_EQ, opnd_create_instr(move1)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, cond_jmp); + instrlist_append(ilist, move1); + instrlist_append(ilist, move2); + + std::vector memref_instr_vec = { + { gen_marker(1, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_branch(1), cond_jmp }, + { gen_instr(1), move2 } + }; + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); if (!run_checker(memrefs, true, { "Branch does not go to the correct target", TID, /*ref_ordinal=*/3, /*last_timestamp=*/0, @@ -303,21 +308,26 @@ check_sane_control_flow() } // Positive test: branches with encodings which go to their targets. { - std::vector memrefs = { - gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), -# if defined(X86_64) || defined(X86_32) - // 0x74 is "je" with the 2nd byte the offset. - gen_branch_encoded(TID, 0x71019dbc, { 0x74, 0x32 }), -# elif defined(ARM_64) - // 71019dbc: 540001a1 b.ne 71019df0 - // <__executable_start+0x19df0> - gen_branch_encoded(TID, 0x71019dbc, 0x540001a1), -# else - // TODO i#5871: Add AArch32 (and RISC-V) encodings. -# endif - gen_instr(TID, 0x71019df0), - }; + instr_t *move1 = XINST_CREATE_move(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + opnd_create_reg(REG2)); + instr_t *move2 = XINST_CREATE_move(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + opnd_create_reg(REG2)); + instr_t *cond_jmp = + XINST_CREATE_jump_cond(GLOBAL_DCONTEXT, DR_PRED_EQ, opnd_create_instr(move1)); + + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, cond_jmp); + instrlist_append(ilist, move1); + instrlist_append(ilist, move2); + std::vector memref_instr_vec = { + { gen_marker(1, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_branch(1), cond_jmp }, + { gen_instr(1), move1 } + }; + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); if (!run_checker(memrefs, false)) { return false; } @@ -1955,6 +1965,242 @@ check_timestamps_increase_monotonically(void) return true; } +bool +check_read_write_records_match_operands() +{ + // Only the number of memory read and write records is checked against the + // operands. Address and size are not used. + std::cerr << "Testing number of memory read/write records matching operands\n"; + constexpr memref_tid_t TID = 1; + + // Correct: number of read records matches the operand. + { + instr_t *load = XINST_CREATE_load(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + OPND_CREATE_MEMPTR(REG1, /*disp=*/0)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, load); + + std::vector memref_instr_vec = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), load }, + { gen_data(TID, /*load=*/true, /*addr=*/0, /*size=*/0), nullptr } + }; + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, false)) { + return false; + } + } + // Incorrect: too many read records. + { + instr_t *load = XINST_CREATE_load(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + OPND_CREATE_MEMPTR(REG1, /*disp=*/0)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, load); + + std::vector memref_instr_vec = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), load }, + { gen_data(TID, /*load=*/true, /*addr=*/0, /*size=*/0), nullptr }, + { gen_data(TID, /*load=*/true, /*addr=*/0, /*size=*/0), nullptr } + }; + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, true, + { "Too many read records", TID, /*ref_ordinal=*/4, + /*last_timestamp=*/0, /*instrs_since_last_timestamp=*/1 }, + "Failed to catch too many read records")) + return false; + } + // Incorrect: missing read records. + { + instr_t *nop = XINST_CREATE_nop(GLOBAL_DCONTEXT); + instr_t *load = XINST_CREATE_load(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + OPND_CREATE_MEMPTR(REG1, /*disp=*/0)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, load); + instrlist_append(ilist, nop); + + std::vector memref_instr_vec = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), load }, + { gen_instr(TID), nop }, + }; + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, true, + { "Missing read records", TID, /*ref_ordinal=*/3, + /*last_timestamp=*/0, /*instrs_since_last_timestamp=*/2 }, + "Failed to catch missing read records")) + return false; + } + // Correct: number of write records matches the operand. + { + instr_t *store = XINST_CREATE_store( + GLOBAL_DCONTEXT, OPND_CREATE_MEMPTR(REG2, /*disp=*/0), opnd_create_reg(REG1)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, store); + + std::vector memref_instr_vec = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), store }, + { gen_data(TID, /*load=*/false, /*addr=*/0, /*size=*/0), nullptr } + }; + + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, false)) { + return false; + } + } + // Incorrect: too many write records. + { + instr_t *store = XINST_CREATE_store( + GLOBAL_DCONTEXT, OPND_CREATE_MEMPTR(REG2, /*disp=*/0), opnd_create_reg(REG1)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, store); + + std::vector memref_instr_vec = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), store }, + { gen_data(TID, /*load=*/false, /*addr=*/0, /*size=*/0), nullptr }, + { gen_data(TID, /*load=*/false, /*addr=*/0, /*size=*/0), nullptr } + }; + + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, true, + { "Too many write records", TID, /*ref_ordinal=*/4, + /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/1 }, + "Failed to catch too many write records")) + return false; + } + // Incorrect: missing write records. + { + instr_t *nop = XINST_CREATE_nop(GLOBAL_DCONTEXT); + instr_t *store = XINST_CREATE_store( + GLOBAL_DCONTEXT, OPND_CREATE_MEMPTR(REG2, /*disp=*/0), opnd_create_reg(REG1)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, store); + instrlist_append(ilist, nop); + + std::vector memref_instr_vec = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), store }, + { gen_instr(TID), nop }, + }; + + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, true, + { "Missing write records", TID, /*ref_ordinal=*/3, + /*last_timestamp=*/0, + /*instrs_since_last_timestamp=*/2 }, + "Fail to catch missing write records")) + return false; + } +#if defined(X86_64) || defined(X86_32) + // Correct: number of read and write records matches the operand. + { + instr_t *movs = INSTR_CREATE_movs_1(GLOBAL_DCONTEXT); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, movs); + + std::vector memref_instr_vec = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), movs }, + { gen_data(TID, /*load=*/true, /*addr=*/0, /*size=*/0), nullptr }, + { gen_data(TID, /*load=*/false, /*addr=*/0, /*size=*/0), nullptr } + }; + + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + auto memrefs = add_encodings_to_memrefs(ilist, memref_instr_vec, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, false)) { + return false; + } + } + // Correct: handle cache flush operand correctly. + { + instr_t *clflush = INSTR_CREATE_clflush( + GLOBAL_DCONTEXT, OPND_CREATE_MEM_clflush(REG1, REG_NULL, 0, 0)); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, clflush); + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + std::vector memref_setup = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), clflush }, + { gen_addr(TID, /*type=*/TRACE_TYPE_DATA_FLUSH, /*addr=*/0, /*size=*/0), + nullptr }, + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, false)) { + return false; + } + } + // Correct: ignore predicated operands which may not have memory access. + { + instr_t *nop = XINST_CREATE_nop(GLOBAL_DCONTEXT); + instr_t *rep_movs = INSTR_CREATE_rep_movs_1(GLOBAL_DCONTEXT); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, rep_movs); + instrlist_append(ilist, nop); + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + std::vector memref_setup = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), rep_movs }, + { gen_instr(TID), nop }, + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, false)) { + return false; + } + } + // Correct: ignore operands with opcode which do not have real memory + // access. + { + instr_t *lea = + INSTR_CREATE_lea(GLOBAL_DCONTEXT, opnd_create_reg(REG1), + opnd_create_base_disp(REG1, REG_NULL, 0, 1, OPSZ_lea)); + instr_t *nop = XINST_CREATE_nop(GLOBAL_DCONTEXT); + instrlist_t *ilist = instrlist_create(GLOBAL_DCONTEXT); + instrlist_append(ilist, lea); + instrlist_append(ilist, nop); + static constexpr addr_t BASE_ADDR = 0xeba4ad4; + std::vector memref_setup = { + { gen_marker(TID, TRACE_MARKER_TYPE_FILETYPE, OFFLINE_FILE_TYPE_ENCODINGS), + nullptr }, + { gen_instr(TID), lea }, + { gen_instr(TID), nop }, + }; + auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); + instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); + if (!run_checker(memrefs, false)) { + return false; + } + } +#endif + return true; +} + int test_main(int argc, const char *argv[]) { @@ -1963,7 +2209,8 @@ test_main(int argc, const char *argv[]) check_duplicate_syscall_with_same_pc() && check_syscalls() && check_rseq_side_exit_discontinuity() && check_schedule_file() && check_branch_decoration() && check_filter_endpoint() && - check_timestamps_increase_monotonically()) { + check_timestamps_increase_monotonically() && + check_read_write_records_match_operands()) { std::cerr << "invariant_checker_test passed\n"; return 0; } diff --git a/clients/drcachesim/tests/memref_gen.h b/clients/drcachesim/tests/memref_gen.h index c444ba2b5e3..b752994ec03 100644 --- a/clients/drcachesim/tests/memref_gen.h +++ b/clients/drcachesim/tests/memref_gen.h @@ -61,6 +61,17 @@ struct memref_with_IR_t { // for instrs created as such. }; +inline memref_t +gen_addr(memref_tid_t tid, trace_type_t type, addr_t addr, size_t size = 1) +{ + memref_t memref = {}; + memref.instr.type = type; + memref.instr.tid = tid; + memref.instr.addr = addr; + memref.instr.size = size; + return memref; +} + inline memref_t gen_data(memref_tid_t tid, bool load, addr_t addr, size_t size) { diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp index fee2b1dc7b4..f420d58b66f 100644 --- a/clients/drcachesim/tools/invariant_checker.cpp +++ b/clients/drcachesim/tools/invariant_checker.cpp @@ -131,6 +131,14 @@ invariant_checker_t::parallel_shard_init(int shard_index, void *worker_data) bool invariant_checker_t::parallel_shard_exit(void *shard_data) { + per_shard_t *shard = reinterpret_cast(shard_data); + if (!TESTANY(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_DFILTERED, + shard->file_type_)) { + report_if_false(shard, shard->expected_read_records_ == 0, + "Missing read records"); + report_if_false(shard, shard->expected_write_records_ == 0, + "Missing write records"); + } return true; } @@ -520,6 +528,23 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem if (TESTANY(OFFLINE_FILE_TYPE_SYSCALL_NUMBERS, shard->file_type_) && instr_is_syscall(cur_instr_decoded->data)) shard->expect_syscall_marker_ = true; + if (cur_instr_decoded != nullptr && cur_instr_decoded->data != nullptr) { + instr_t *instr = cur_instr_decoded->data; + if (!instr_is_predicated(instr) && + !TESTANY(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_DFILTERED, + shard->file_type_)) { + // Verify the number of read/write records matches the last + // operand. Skip D-filtered traces which don't have every load or + // store records. + report_if_false(shard, shard->expected_read_records_ == 0, + "Missing read records"); + report_if_false(shard, shard->expected_write_records_ == 0, + "Missing write records"); + + shard->expected_read_records_ = instr_num_memory_read_access(instr); + shard->expected_write_records_ = instr_num_memory_write_access(instr); + } + } } if (knob_verbose_ >= 3) { std::cerr << "::" << memref.data.pid << ":" << memref.data.tid << ":: " @@ -844,6 +869,30 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem shard->prev_entry_ = memref; if (type_is_instr_branch(shard->prev_entry_.instr.type)) shard->last_branch_ = shard->prev_entry_; + + if (type_is_data(memref.data.type) && shard->prev_instr_decoded_ != nullptr) { + // If the instruction is predicated, the check is skipped since we do + // not have the data to determine how many memory accesses to expect. + if (!instr_is_predicated(shard->prev_instr_decoded_->data) && + !TESTANY(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_DFILTERED, + shard->file_type_)) { + if (type_is_read(memref.data.type)) { + // Skip D-filtered traces which don't have every load or store records. + report_if_false(shard, shard->expected_read_records_ > 0, + "Too many read records"); + if (shard->expected_read_records_ > 0) { + shard->expected_read_records_--; + } + } else { + // Skip D-filtered traces which don't have every load or store records. + report_if_false(shard, shard->expected_write_records_ > 0, + "Too many write records"); + if (shard->expected_write_records_ > 0) { + shard->expected_write_records_--; + } + } + } + } return true; } diff --git a/clients/drcachesim/tools/invariant_checker.h b/clients/drcachesim/tools/invariant_checker.h index b8a52d1c17f..3f605e03d6e 100644 --- a/clients/drcachesim/tools/invariant_checker.h +++ b/clients/drcachesim/tools/invariant_checker.h @@ -198,6 +198,9 @@ class invariant_checker_t : public analysis_tool_t { bool saw_filter_endpoint_marker_ = false; // Used to check markers after each system call. bool expect_syscall_marker_ = false; + // Counters for expected read and write records. + int expected_read_records_ = 0; + int expected_write_records_ = 0; }; // We provide this for subclasses to run these invariants with custom diff --git a/core/ir/instr_api.h b/core/ir/instr_api.h index 8edb23ba1e4..c94d8a69792 100644 --- a/core/ir/instr_api.h +++ b/core/ir/instr_api.h @@ -1656,6 +1656,20 @@ DR_API uint instr_memory_reference_size(instr_t *instr); +DR_API +/** + * Returns the number of memory read accesses of the instruction. + */ +uint +instr_num_memory_read_access(instr_t *instr); + +DR_API +/** + * Returns the number of memory write accesses of the instruction. + */ +uint +instr_num_memory_write_access(instr_t *instr); + DR_API /** * \return a pointer to user-controlled data fields in a label instruction. diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index a002665a056..fcbbf5e6513 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -2660,6 +2660,46 @@ decode_memory_reference_size(void *drcontext, app_pc pc, uint *size_in_bytes) return next_pc; } +/* Returns the number of memory read accesses of the instruction. + */ +uint +instr_num_memory_read_access(instr_t *instr) +{ + int i; + opnd_t curop; + int count = 0; + const int opc = instr_get_opcode(instr); + + if (opc_is_not_a_real_memory_load(opc)) + return 0; + + for (i = 0; i < instr_num_srcs(instr); i++) { + curop = instr_get_src(instr, i); + if (opnd_is_memory_reference(curop)) { + ++count; + } + } + return count; +} + +/* Returns the number of memory write accesses of the instruction. + */ +uint +instr_num_memory_write_access(instr_t *instr) +{ + int i; + opnd_t curop; + int count = 0; + + for (i = 0; i < instr_num_dsts(instr); i++) { + curop = instr_get_dst(instr, i); + if (opnd_is_memory_reference(curop)) { + ++count; + } + } + return count; +} + DR_API dr_instr_label_data_t * instr_get_label_data_area(instr_t *instr)