Skip to content

Commit

Permalink
ppc64: Use optimized memcmp
Browse files Browse the repository at this point in the history
Instead of belonging to the common C memcmp() function, belong on the
optimized one stolen from the kernel.

Signed-off-by: Laurent Dufour <[email protected]>
Signed-off-by: Pavel Emelyanov <[email protected]>
  • Loading branch information
Laurent Dufour authored and xemul committed May 14, 2015
1 parent 16ad194 commit d28984e
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 1 deletion.
2 changes: 1 addition & 1 deletion arch/ppc64/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SYS-ASM := syscalls.S
syscalls-asm-y += $(SYS-ASM:.S=).o
crtools-obj-y += crtools.o
crtools-obj-y += cpu.o
crtools-asm-y += memcpy_power7.o
crtools-asm-y += memcpy_power7.o memcmp_64.o

SYS-DEF := syscall-ppc64.def
SYS-ASM-COMMON := syscall-common-ppc64.S
Expand Down
3 changes: 3 additions & 0 deletions arch/ppc64/include/asm/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "compiler.h"

#define HAS_BUILTIN_MEMCPY
#define HAS_BUILTIN_MEMCMP

#include "asm-generic/string.h"

Expand All @@ -15,4 +16,6 @@ static inline void *builtin_memcpy(void *to, const void *from, unsigned long n)
return to;
}

extern int builtin_memcmp(const void *cs, const void *ct, size_t count);

#endif /* __CR_ASM_STRING_H__ */
236 changes: 236 additions & 0 deletions arch/ppc64/memcmp_64.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/*
* Author: Anton Blanchard <[email protected]>
* Copyright 2015 IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* --
* Copied form the linux file arch/powerpc/lib/memcmp_64.S
*/
#include "asm/linkage.h"

#define off8 r6
#define off16 r7
#define off24 r8

#define rA r9
#define rB r10
#define rC r11
#define rD r27
#define rE r28
#define rF r29
#define rG r30
#define rH r31

#ifdef __LITTLE_ENDIAN__
#define LD ldbrx
#else
#define LD ldx
#endif

ENTRY(builtin_memcmp)
cmpdi cr1,r5,0

/* Use the short loop if both strings are not 8B aligned */
or r6,r3,r4
andi. r6,r6,7

/* Use the short loop if length is less than 32B */
cmpdi cr6,r5,31

beq cr1,.Lzero
bne .Lshort
bgt cr6,.Llong

.Lshort:
mtctr r5

1: lbz rA,0(r3)
lbz rB,0(r4)
subf. rC,rB,rA
bne .Lnon_zero
bdz .Lzero

lbz rA,1(r3)
lbz rB,1(r4)
subf. rC,rB,rA
bne .Lnon_zero
bdz .Lzero

lbz rA,2(r3)
lbz rB,2(r4)
subf. rC,rB,rA
bne .Lnon_zero
bdz .Lzero

lbz rA,3(r3)
lbz rB,3(r4)
subf. rC,rB,rA
bne .Lnon_zero

addi r3,r3,4
addi r4,r4,4

bdnz 1b

.Lzero:
li r3,0
blr

.Lnon_zero:
mr r3,rC
blr

.Llong:
li off8,8
li off16,16
li off24,24

std r31,-8(r1)
std r30,-16(r1)
std r29,-24(r1)
std r28,-32(r1)
std r27,-40(r1)

srdi r0,r5,5
mtctr r0
andi. r5,r5,31

LD rA,0,r3
LD rB,0,r4

LD rC,off8,r3
LD rD,off8,r4

LD rE,off16,r3
LD rF,off16,r4

LD rG,off24,r3
LD rH,off24,r4
cmpld cr0,rA,rB

addi r3,r3,32
addi r4,r4,32

bdz .Lfirst32

LD rA,0,r3
LD rB,0,r4
cmpld cr1,rC,rD

LD rC,off8,r3
LD rD,off8,r4
cmpld cr6,rE,rF

LD rE,off16,r3
LD rF,off16,r4
cmpld cr7,rG,rH
bne cr0,.LcmpAB

LD rG,off24,r3
LD rH,off24,r4
cmpld cr0,rA,rB
bne cr1,.LcmpCD

addi r3,r3,32
addi r4,r4,32

bdz .Lsecond32

.balign 16

1: LD rA,0,r3
LD rB,0,r4
cmpld cr1,rC,rD
bne cr6,.LcmpEF

LD rC,off8,r3
LD rD,off8,r4
cmpld cr6,rE,rF
bne cr7,.LcmpGH

LD rE,off16,r3
LD rF,off16,r4
cmpld cr7,rG,rH
bne cr0,.LcmpAB

LD rG,off24,r3
LD rH,off24,r4
cmpld cr0,rA,rB
bne cr1,.LcmpCD

addi r3,r3,32
addi r4,r4,32

bdnz 1b

.Lsecond32:
cmpld cr1,rC,rD
bne cr6,.LcmpEF

cmpld cr6,rE,rF
bne cr7,.LcmpGH

cmpld cr7,rG,rH
bne cr0,.LcmpAB

bne cr1,.LcmpCD
bne cr6,.LcmpEF
bne cr7,.LcmpGH

.Ltail:
ld r31,-8(r1)
ld r30,-16(r1)
ld r29,-24(r1)
ld r28,-32(r1)
ld r27,-40(r1)

cmpdi r5,0
beq .Lzero
b .Lshort

.Lfirst32:
cmpld cr1,rC,rD
cmpld cr6,rE,rF
cmpld cr7,rG,rH

bne cr0,.LcmpAB
bne cr1,.LcmpCD
bne cr6,.LcmpEF
bne cr7,.LcmpGH

b .Ltail

.LcmpAB:
li r3,1
bgt cr0,.Lout
li r3,-1
b .Lout

.LcmpCD:
li r3,1
bgt cr1,.Lout
li r3,-1
b .Lout

.LcmpEF:
li r3,1
bgt cr6,.Lout
li r3,-1
b .Lout

.LcmpGH:
li r3,1
bgt cr7,.Lout
li r3,-1

.Lout:
ld r31,-8(r1)
ld r30,-16(r1)
ld r29,-24(r1)
ld r28,-32(r1)
ld r27,-40(r1)
blr
1 change: 1 addition & 0 deletions pie/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ endif
ifeq ($(SRCARCH), ppc64)
asm-e += $(ARCH_DIR)/vdso-trampoline.o
asm-e += $(ARCH_DIR)/memcpy_power7.o
asm-e += $(ARCH_DIR)/memcmp_64.o
endif
endif

Expand Down

0 comments on commit d28984e

Please sign in to comment.