Skip to content

Commit

Permalink
Minor binary search optimization for field lookup slow path.
Browse files Browse the repository at this point in the history
On a Cortex-A55 this resulted in a 28.30% reduction in CPU and wall time for the binary search path.

Loop body before:
```
.LBB0_2:
        add     w8, w12, #1
        cmp     w8, w11
        b.gt    .LBB0_6 // Predictable branch, ends the loop
.LBB0_3:
        add     w12, w8, w11
        add     w12, w12, w12, lsr #31
        asr     w12, w12, #1
        smaddl  x0, w12, w10, x9
        ldr     w13, [x0]
        cmp     w13, w1
        b.lo    .LBB0_2 // Unpredictable branch here! Will be hit 50/50 in prod
        b.ls    .LBB0_7 // Predictable branch - ends the loop
        sub     w11, w12, #1
        cmp     w8, w11
        b.le    .LBB0_3 // Predictable branch - continues the loop
```

Loop body after:
```
.LBB7_1:
        cmp     w9, w11
        b.hi    .LBB7_4 // Predictable branch - ends the loop
        add     w12, w9, w11
        lsr     w12, w12, #1
        umaddl  x0, w12, w8, x10
        sub     w14, w12, #1
        ldr     w13, [x0]
        cmp     w13, w1
        csel    w11, w14, w11, hs
        csinc   w9, w9, w12, hs
        b.ne    .LBB7_1 // Predictable branch - continues the loop
```

PiperOrigin-RevId: 700864625
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Dec 4, 2024
1 parent 6a0ff9d commit 83bfe15
Showing 1 changed file with 21 additions and 13 deletions.
34 changes: 21 additions & 13 deletions upb/mini_table/message.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

#include "upb/mini_table/message.h"

#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>

Expand All @@ -27,21 +26,30 @@ const upb_MiniTableField* upb_MiniTable_FindFieldByNumber(
}

// Slow case: binary search
int lo = m->UPB_PRIVATE(dense_below);
int hi = m->UPB_PRIVATE(field_count) - 1;
while (lo <= hi) {
int mid = (lo + hi) / 2;
uint32_t num = m->UPB_PRIVATE(fields)[mid].UPB_PRIVATE(number);
if (num < number) {
lo = mid + 1;
continue;
uint32_t lo = m->UPB_PRIVATE(dense_below);
int32_t hi = m->UPB_PRIVATE(field_count) - 1;
const upb_MiniTableField* base = m->UPB_PRIVATE(fields);
while (hi >= (int32_t)lo) {
uint32_t mid = (hi + lo) / 2;
uint32_t num = base[mid].UPB_ONLYBITS(number);
// These comparison operations allow, on ARM machines, to fuse all these
// branches into one comparison followed by two CSELs to set the lo/hi
// values, followed by a BNE to continue or terminate the loop. Since binary
// search branches are generally unpredictable (50/50 in each direction),
// this is a good deal. We use signed for the high, as this decrement may
// underflow if mid is 0.
int32_t hi_mid = mid - 1;
uint32_t lo_mid = mid + 1;
if (num == number) {
return &base[mid];
}
if (num > number) {
hi = mid - 1;
continue;
if (num < number) {
lo = lo_mid;
} else {
hi = hi_mid;
}
return &m->UPB_PRIVATE(fields)[mid];
}

return NULL;
}

Expand Down

0 comments on commit 83bfe15

Please sign in to comment.