Skip to content

Commit

Permalink
perf intel-pt: Fix premature IPC
Browse files Browse the repository at this point in the history
[ Upstream commit 20aa397 ]

The code assumed a change in cycle count means accurate IPC. That is not
correct, for example when sampling both branches and instructions, or at
a FUP packet (which is not CYC-eligible) address. Fix by using an explicit
flag to indicate when IPC can be sampled.

Fixes: 5b1dc0f ("perf intel-pt: Add support for samples to contain IPC ratio")
Signed-off-by: Adrian Hunter <[email protected]>
Reviewed-by: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: [email protected]
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: Sasha Levin <[email protected]>
  • Loading branch information
ahunter6 authored and gregkh committed Mar 4, 2021
1 parent bbf7207 commit f906a02
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 11 deletions.
11 changes: 10 additions & 1 deletion tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -2814,9 +2814,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
}
if (intel_pt_sample_time(decoder->pkt_state)) {
intel_pt_update_sample_time(decoder);
if (decoder->sample_cyc)
if (decoder->sample_cyc) {
decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
decoder->sample_cyc = false;
}
}
/*
* When using only TSC/MTC to compute cycles, IPC can be
* sampled as soon as the cycle count changes.
*/
if (!decoder->have_cyc)
decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
}

decoder->state.timestamp = decoder->sample_timestamp;
Expand Down
1 change: 1 addition & 0 deletions tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#define INTEL_PT_ABORT_TX (1 << 1)
#define INTEL_PT_ASYNC (1 << 2)
#define INTEL_PT_FUP_IP (1 << 3)
#define INTEL_PT_SAMPLE_IPC (1 << 4)

enum intel_pt_sample_type {
INTEL_PT_BRANCH = 1 << 0,
Expand Down
16 changes: 6 additions & 10 deletions tools/perf/util/intel-pt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1381,7 +1381,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}

sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
Expand Down Expand Up @@ -1431,7 +1432,8 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
else
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;

sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (sample.cyc_cnt) {
sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
Expand Down Expand Up @@ -1966,14 +1968,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)

ptq->have_sample = false;

if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
/*
* Cycle count and instruction count only go together to create
* a valid IPC ratio when the cycle count changes.
*/
ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
}
ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;

/*
* Do PEBS first to allow for the possibility that the PEBS timestamp
Expand Down

0 comments on commit f906a02

Please sign in to comment.