-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #65 from segmentio/keyset-arm64
keyset: port to arm64
- Loading branch information
Showing
4 changed files
with
155 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
//go:build !purego | ||
// +build !purego | ||
|
||
package keyset | ||
|
||
// Lookup searches for a key in a set of keys, returning its index if | ||
// found. If the key cannot be found, the number of keys is returned. | ||
func Lookup(keyset []byte, key []byte) int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
//go:build !purego | ||
// +build !purego | ||
|
||
#include "textflag.h" | ||
|
||
// func Lookup(keyset []byte, key []byte) int | ||
TEXT ·Lookup(SB), NOSPLIT, $0-56 | ||
MOVD keyset+0(FP), R0 | ||
MOVD keyset_len+8(FP), R1 | ||
MOVD key+24(FP), R2 | ||
MOVD key_len+32(FP), R3 | ||
MOVD key_cap+40(FP), R4 | ||
|
||
// None of the keys in the set are greater than 16 bytes, so if the input | ||
// key is we can jump straight to not found. | ||
CMP $16, R3 | ||
BHI notfound | ||
|
||
// We'll be moving the keyset pointer (R0) forward as we compare keys, so | ||
// make a copy of the starting point (R6). Also add the byte length (R1) to | ||
// obtain a pointer to the end of the keyset (R5). | ||
MOVD R0, R6 | ||
ADD R0, R1, R5 | ||
|
||
// Prepare a 64-bit mask of all ones. | ||
MOVD $-1, R7 | ||
|
||
// Prepare a vector of all zeroes. | ||
VMOV ZR, V1.B16 | ||
|
||
// Check that it's safe to load 16 bytes of input. If cap(input)<16, jump | ||
// to a check that determines whether a tail load is necessary (to avoid a | ||
// page fault). | ||
CMP $16, R4 | ||
BLO safeload | ||
|
||
load: | ||
// Load the input key (V0) and pad with zero bytes (V1). To blend the two | ||
// vectors, we load a mask for the particular key length and then use TBL | ||
// to select bytes from either V0 or V1. | ||
VLD1 (R2), [V0.B16] | ||
MOVD $blend_masks<>(SB), R10 | ||
ADD R3<<4, R10, R10 | ||
VLD1 (R10), [V2.B16] | ||
VTBL V2.B16, [V0.B16, V1.B16], V3.B16 | ||
|
||
loop: | ||
// Loop through each 16 byte key in the keyset. | ||
CMP R0, R5 | ||
BEQ notfound | ||
|
||
// Load and compare the next key. | ||
VLD1.P 16(R0), [V4.B16] | ||
VCMEQ V3.B16, V4.B16, V5.B16 | ||
VMOV V5.D[0], R8 | ||
VMOV V5.D[1], R9 | ||
AND R8, R9, R9 | ||
|
||
// If the masks match, we found the key. | ||
CMP R9, R7 | ||
BEQ found | ||
JMP loop | ||
|
||
found: | ||
// If the key was found, take the position in the keyset and convert it | ||
// to an index. The keyset pointer (R0) will be 1 key past the match, so | ||
// subtract the starting pointer (R6), divide by 16 to convert from byte | ||
// length to an index, and then subtract one. | ||
SUB R6, R0, R0 | ||
ADD R0>>4, ZR, R0 | ||
SUB $1, R0, R0 | ||
MOVD R0, ret+48(FP) | ||
RET | ||
|
||
notfound: | ||
// Return the number of keys in the keyset, which is the byte length (R1) | ||
// divided by 16. | ||
ADD R1>>4, ZR, R1 | ||
MOVD R1, ret+48(FP) | ||
RET | ||
|
||
safeload: | ||
// Check if the input crosses a page boundary. If not, jump back. | ||
AND $4095, R2, R12 | ||
CMP $4080, R12 | ||
BLS load | ||
|
||
// If it does cross a page boundary, we must assume that loading 16 bytes | ||
// will cause a fault. Instead, we load the 16 bytes up to and including the | ||
// key and then shuffle the key forward in the register. We can shuffle and | ||
// pad with zeroes at the same time to avoid having to also blend (as load | ||
// does). | ||
MOVD $16, R12 | ||
SUB R3, R12, R12 | ||
SUB R12, R2, R2 | ||
VLD1 (R2), [V0.B16] | ||
MOVD $shuffle_masks<>(SB), R10 | ||
ADD R12, R10, R10 | ||
VLD1 (R10), [V2.B16] | ||
VTBL V2.B16, [V0.B16, V1.B16], V3.B16 | ||
JMP loop | ||
|
||
DATA blend_masks<>+0(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+8(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+16(SB)/8, $0x1010101010101000 | ||
DATA blend_masks<>+24(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+32(SB)/8, $0x1010101010100100 | ||
DATA blend_masks<>+40(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+48(SB)/8, $0x1010101010020100 | ||
DATA blend_masks<>+56(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+64(SB)/8, $0x1010101003020100 | ||
DATA blend_masks<>+72(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+80(SB)/8, $0x1010100403020100 | ||
DATA blend_masks<>+88(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+96(SB)/8, $0x1010050403020100 | ||
DATA blend_masks<>+104(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+112(SB)/8, $0x1006050403020100 | ||
DATA blend_masks<>+120(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+128(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+136(SB)/8, $0x1010101010101010 | ||
DATA blend_masks<>+144(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+152(SB)/8, $0x1010101010101008 | ||
DATA blend_masks<>+160(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+168(SB)/8, $0x1010101010100908 | ||
DATA blend_masks<>+176(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+184(SB)/8, $0x10101010100A0908 | ||
DATA blend_masks<>+192(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+200(SB)/8, $0x101010100B0A0908 | ||
DATA blend_masks<>+208(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+216(SB)/8, $0x1010100C0B0A0908 | ||
DATA blend_masks<>+224(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+232(SB)/8, $0x10100D0C0B0A0908 | ||
DATA blend_masks<>+240(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+248(SB)/8, $0x100E0D0C0B0A0908 | ||
DATA blend_masks<>+256(SB)/8, $0x0706050403020100 | ||
DATA blend_masks<>+264(SB)/8, $0x0F0E0D0C0B0A0908 | ||
GLOBL blend_masks<>(SB), RODATA|NOPTR, $272 | ||
|
||
DATA shuffle_masks<>+0(SB)/8, $0x0706050403020100 | ||
DATA shuffle_masks<>+8(SB)/8, $0x0F0E0D0C0B0A0908 | ||
DATA shuffle_masks<>+16(SB)/8, $0x1010101010101010 | ||
DATA shuffle_masks<>+24(SB)/8, $0x1010101010101010 | ||
GLOBL shuffle_masks<>(SB), RODATA|NOPTR, $32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters