Skip to content

Commit

Permalink
Merge pull request #18819 from Akira1Saitoh/StringIndexOfCharJDK17
Browse files Browse the repository at this point in the history
Accelerate String.indexOf(char) in JDK17+
  • Loading branch information
knn-k authored Feb 4, 2024
2 parents 8dc58cc + 107ff02 commit 335e720
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 60 deletions.
30 changes: 18 additions & 12 deletions runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6292,13 +6292,14 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c
*
*/

/*
* We omit to evaluate the first child (receiver) as it is not used.
*/
TR::Node *arrayNode = node->getSecondChild();
TR::Node *charNode = node->getThirdChild();
TR::Node *offsetNode = node->getChild(3);
TR::Node *lengthNode = node->getChild(4);
// This evaluator function handles different indexOf() intrinsics, some of which are static calls without a
// receiver. Hence, the need for static call check.
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
TR::Node *arrayNode = node->getChild(firstCallArgIdx);
TR::Node *charNode = node->getChild(firstCallArgIdx + 1);
TR::Node *offsetNode = node->getChild(firstCallArgIdx + 2);
TR::Node *lengthNode = node->getChild(firstCallArgIdx + 3);
TR::Register *arrayReg = cg->evaluate(arrayNode);
TR::Register *charReg = cg->evaluate(charNode);
const bool isOffsetConstZero = offsetNode->isConstZeroValue();
Expand Down Expand Up @@ -6479,11 +6480,14 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c
node->setRegister(resultReg);
cg->stopUsingRegister(zeroReg);
srm->stopUsingRegisters();
cg->recursivelyDecReferenceCount(node->getFirstChild());
cg->decReferenceCount(arrayNode);
cg->decReferenceCount(charNode);
cg->decReferenceCount(offsetNode);
cg->decReferenceCount(lengthNode);
if (!isStaticCall)
{
cg->recursivelyDecReferenceCount(node->getFirstChild());
}
for (int32_t i = firstCallArgIdx; i < node->getNumChildren(); i++)
{
cg->decReferenceCount(node->getChild(i));
}

return resultReg;
}
Expand All @@ -6503,6 +6507,7 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
{
switch (methodSymbol->getMandatoryRecognizedMethod())
{
case TR::java_lang_StringLatin1_indexOfChar:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
if (cg->getSupportsInlineStringIndexOf())
{
Expand All @@ -6511,6 +6516,7 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
}
break;

case TR::java_lang_StringUTF16_indexOfCharUnsafe:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:
if (cg->getSupportsInlineStringIndexOf())
{
Expand Down
2 changes: 2 additions & 0 deletions runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@
java_lang_String_startsWith,

java_lang_StringLatin1_indexOf,
java_lang_StringLatin1_indexOfChar,

java_lang_StringUTF16_charAt,
java_lang_StringUTF16_checkIndex,
Expand All @@ -230,6 +231,7 @@
java_lang_StringUTF16_compareValues,
java_lang_StringUTF16_getChar,
java_lang_StringUTF16_indexOf,
java_lang_StringUTF16_indexOfCharUnsafe,
java_lang_StringUTF16_length,
java_lang_StringUTF16_newBytesFor,
java_lang_StringUTF16_putChar,
Expand Down
2 changes: 2 additions & 0 deletions runtime/compiler/env/j9method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3194,6 +3194,7 @@ void TR_ResolvedJ9Method::construct()
static X StringLatin1Methods[] =
{
{ x(TR::java_lang_StringLatin1_indexOf, "indexOf", "([BI[BII)I")},
{ x(TR::java_lang_StringLatin1_indexOfChar, "indexOfChar", "([BIII)I")},
{ x(TR::java_lang_StringLatin1_inflate, "inflate", "([BI[CII)V")},
{ TR::unknownMethod }
};
Expand All @@ -3207,6 +3208,7 @@ void TR_ResolvedJ9Method::construct()
{ x(TR::java_lang_StringUTF16_compareValues, "compareValues", "([B[BII)I")},
{ x(TR::java_lang_StringUTF16_getChar, "getChar", "([BI)C")},
{ x(TR::java_lang_StringUTF16_indexOf, "indexOf", "([BI[BII)I")},
{ x(TR::java_lang_StringUTF16_indexOfCharUnsafe, "indexOfCharUnsafe", "([BIII)I")},
{ x(TR::java_lang_StringUTF16_length, "length", "([B)I")},
{ x(TR::java_lang_StringUTF16_newBytesFor, "newBytesFor", "(I)[B")},
{ x(TR::java_lang_StringUTF16_putChar, "putChar", "([BII)V")},
Expand Down
2 changes: 2 additions & 0 deletions runtime/compiler/optimizer/InlinerTempForJ9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4805,7 +4805,9 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite
}
break;
case TR::java_lang_StringLatin1_indexOf:
case TR::java_lang_StringLatin1_indexOfChar:
case TR::java_lang_StringUTF16_indexOf:
case TR::java_lang_StringUTF16_indexOfCharUnsafe:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringLatin1:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringUTF16:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
Expand Down
19 changes: 18 additions & 1 deletion runtime/compiler/optimizer/J9ValuePropagation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1854,7 +1854,24 @@ J9::ValuePropagation::constrainRecognizedMethod(TR::Node *node)
return;
break;
}

case TR::java_lang_StringLatin1_indexOfChar:
case TR::java_lang_StringUTF16_indexOfCharUnsafe:
{
TR::Node *sourceStringNode = node->getFirstChild();
TR::Node *targetCharNode = node->getSecondChild();
TR::Node *startNode = node->getChild(2);
TR::Node *lengthNode = node->getChild(3);
bool is16Bit = rm == TR::java_lang_StringUTF16_indexOfCharUnsafe;
if (transformIndexOfKnownString(
node,
sourceStringNode,
targetCharNode,
startNode,
lengthNode,
is16Bit))
return;
break;
}
default:
break;
}
Expand Down
72 changes: 44 additions & 28 deletions runtime/compiler/p/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10823,11 +10823,14 @@ static TR::Register *inlineIntrinsicIndexOf_P10(TR::Node *node, TR::CodeGenerato
auto vectorCompareOp = isLatin1 ? TR::InstOpCode::vcmpequb_r : TR::InstOpCode::vcmpequh_r;
TR::InstOpCode::Mnemonic scalarLoadOp = isLatin1 ? TR::InstOpCode::lbzx : TR::InstOpCode::lhzx;


TR::Register *array = cg->evaluate(node->getChild(1));
TR::Register *ch = cg->evaluate(node->getChild(2));
TR::Register *offset = cg->evaluate(node->getChild(3));
TR::Register *length = cg->evaluate(node->getChild(4));
// This evaluator function handles different indexOf() intrinsics, some of which are static calls without a
// receiver. Hence, the need for static call check.
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
TR::Register *array = cg->evaluate(node->getChild(firstCallArgIdx));
TR::Register *ch = cg->evaluate(node->getChild(firstCallArgIdx+1));
TR::Register *offset = cg->evaluate(node->getChild(firstCallArgIdx+2));
TR::Register *length = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));

TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
TR::LabelSymbol *mainLoop = generateLabelSymbol(cg);
Expand Down Expand Up @@ -10980,11 +10983,14 @@ static TR::Register *inlineIntrinsicIndexOf_P10(TR::Node *node, TR::CodeGenerato

node->setRegister(result);

cg->decReferenceCount(node->getChild(0));
cg->decReferenceCount(node->getChild(1));
cg->decReferenceCount(node->getChild(2));
cg->decReferenceCount(node->getChild(3));
cg->decReferenceCount(node->getChild(4));
if (!isStaticCall)
{
cg->recursivelyDecReferenceCount(node->getChild(0));
}
for (int32_t i = firstCallArgIdx; i < node->getNumChildren(); i++)
{
cg->decReferenceCount(node->getChild(i));
}

return result;
}
Expand All @@ -10998,10 +11004,14 @@ static TR::Register *inlineIntrinsicIndexOf(TR::Node *node, TR::CodeGenerator *c
auto vectorCompareOp = isLatin1 ? TR::InstOpCode::vcmpequb_r : TR::InstOpCode::vcmpequh_r;
auto scalarLoadOp = isLatin1 ? TR::InstOpCode::lbzx : TR::InstOpCode::lhzx;

TR::Register *array = cg->evaluate(node->getChild(1));
TR::Register *ch = cg->evaluate(node->getChild(2));
TR::Register *offset = cg->evaluate(node->getChild(3));
TR::Register *length = cg->evaluate(node->getChild(4));
// This evaluator function handles different indexOf() intrinsics, some of which are static calls without a
// receiver. Hence, the need for static call check.
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
TR::Register *array = cg->evaluate(node->getChild(firstCallArgIdx));
TR::Register *ch = cg->evaluate(node->getChild(firstCallArgIdx+1));
TR::Register *offset = cg->evaluate(node->getChild(firstCallArgIdx+2));
TR::Register *length = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));

TR::Register *cr0 = cg->allocateRegister(TR_CCR);
TR::Register *cr6 = cg->allocateRegister(TR_CCR);
Expand Down Expand Up @@ -11048,13 +11058,13 @@ static TR::Register *inlineIntrinsicIndexOf(TR::Node *node, TR::CodeGenerator *c
generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, endAddress, endAddress, endAddress);
}

if (node->getChild(3)->getReferenceCount() == 1)
if (node->getChild(firstCallArgIdx+2)->getReferenceCount() == 1)
srm->donateScratchRegister(offset);
if (node->getChild(4)->getReferenceCount() == 1)
if (node->getChild(firstCallArgIdx+3)->getReferenceCount() == 1)
srm->donateScratchRegister(length);

generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, arrAddress, array, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
if (node->getChild(1)->getReferenceCount() == 1)
if (node->getChild(firstCallArgIdx)->getReferenceCount() == 1)
srm->donateScratchRegister(array);

// Handle the first character using a simple scalar compare. Otherwise, first character matches
Expand Down Expand Up @@ -11090,7 +11100,7 @@ static TR::Register *inlineIntrinsicIndexOf(TR::Node *node, TR::CodeGenerator *c
// Splat the value to be compared against and its bitwise complement into two vector registers
// for later use
generateTrg1Src1Instruction(cg, TR::InstOpCode::mtvsrwz, node, targetVector, ch);
if (node->getChild(2)->getReferenceCount() == 1)
if (node->getChild(firstCallArgIdx+1)->getReferenceCount() == 1)
srm->donateScratchRegister(ch);
if (isLatin1)
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::vspltb, node, targetVector, targetVector, 7);
Expand Down Expand Up @@ -11301,16 +11311,16 @@ static TR::Register *inlineIntrinsicIndexOf(TR::Node *node, TR::CodeGenerator *c

TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 15 + srm->numAvailableRegisters(), cg->trMemory());

if (node->getChild(1)->getReferenceCount() != 1)
if (node->getChild(firstCallArgIdx)->getReferenceCount() != 1)
{
deps->addPostCondition(array, TR::RealRegister::NoReg);
deps->getPostConditions()->getRegisterDependency(deps->getAddCursorForPost() - 1)->setExcludeGPR0();
}
if (node->getChild(2)->getReferenceCount() != 1)
if (node->getChild(firstCallArgIdx+1)->getReferenceCount() != 1)
deps->addPostCondition(ch, TR::RealRegister::NoReg);
if (node->getChild(3)->getReferenceCount() != 1)
if (node->getChild(firstCallArgIdx+2)->getReferenceCount() != 1)
deps->addPostCondition(offset, TR::RealRegister::NoReg);
if (node->getChild(4)->getReferenceCount() != 1)
if (node->getChild(firstCallArgIdx+3)->getReferenceCount() != 1)
deps->addPostCondition(length, TR::RealRegister::NoReg);

deps->addPostCondition(cr0, TR::RealRegister::cr0);
Expand Down Expand Up @@ -11338,11 +11348,14 @@ static TR::Register *inlineIntrinsicIndexOf(TR::Node *node, TR::CodeGenerator *c

node->setRegister(result);

cg->decReferenceCount(node->getChild(0));
cg->decReferenceCount(node->getChild(1));
cg->decReferenceCount(node->getChild(2));
cg->decReferenceCount(node->getChild(3));
cg->decReferenceCount(node->getChild(4));
if (!isStaticCall)
{
cg->recursivelyDecReferenceCount(node->getChild(0));
}
for (int32_t i = firstCallArgIdx; i < node->getNumChildren(); i++)
{
cg->decReferenceCount(node->getChild(i));
}

return result;
}
Expand Down Expand Up @@ -11772,11 +11785,14 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
}
break;

case TR::java_lang_StringLatin1_indexOfChar:
case TR::java_lang_StringUTF16_indexOfCharUnsafe:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:
if (cg->getSupportsInlineStringIndexOf())
{
bool isLatin1 = methodSymbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1;
bool isLatin1 = (methodSymbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1) ||
(methodSymbol->getRecognizedMethod() == TR::java_lang_StringLatin1_indexOfChar);
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10))
resultReg = inlineIntrinsicIndexOf_P10(node, cg, isLatin1);
else
Expand Down
27 changes: 18 additions & 9 deletions runtime/compiler/x/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9497,10 +9497,14 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c
shift = 1;
}

auto array = cg->evaluate(node->getChild(1));
auto ch = cg->evaluate(node->getChild(2));
auto offset = cg->evaluate(node->getChild(3));
auto length = cg->evaluate(node->getChild(4));
// This evaluator function handles different indexOf() intrinsics, some of which are static calls without a
// receiver. Hence, the need for static call check.
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
auto array = cg->evaluate(node->getChild(firstCallArgIdx));
auto ch = cg->evaluate(node->getChild(firstCallArgIdx+1));
auto offset = cg->evaluate(node->getChild(firstCallArgIdx+2));
auto length = cg->evaluate(node->getChild(firstCallArgIdx+3));

auto ECX = cg->allocateRegister();
auto result = cg->allocateRegister();
Expand Down Expand Up @@ -9584,11 +9588,14 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c


node->setRegister(result);
cg->recursivelyDecReferenceCount(node->getChild(0));
cg->decReferenceCount(node->getChild(1));
cg->decReferenceCount(node->getChild(2));
cg->decReferenceCount(node->getChild(3));
cg->decReferenceCount(node->getChild(4));
if (!isStaticCall)
{
cg->recursivelyDecReferenceCount(node->getChild(0));
}
for (int32_t i = firstCallArgIdx; i < node->getNumChildren(); i++)
{
cg->decReferenceCount(node->getChild(i));
}
return result;
}

Expand Down Expand Up @@ -11638,11 +11645,13 @@ J9::X86::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *c

switch (symbol->getMandatoryRecognizedMethod())
{
case TR::java_lang_StringLatin1_indexOfChar:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
if (cg->getSupportsInlineStringIndexOf())
return inlineIntrinsicIndexOf(node, cg, true);
break;

case TR::java_lang_StringUTF16_indexOfCharUnsafe:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:
if (cg->getSupportsInlineStringIndexOf())
return inlineIntrinsicIndexOf(node, cg, false);
Expand Down
2 changes: 2 additions & 0 deletions runtime/compiler/z/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4029,9 +4029,11 @@ J9::Z::CodeGenerator::inlineDirectCall(
{
switch (methodSymbol->getRecognizedMethod())
{
case TR::java_lang_StringLatin1_indexOfChar:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
resultReg = TR::TreeEvaluator::inlineIntrinsicIndexOf(node, cg, true);
return true;
case TR::java_lang_StringUTF16_indexOfCharUnsafe:
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:
resultReg = TR::TreeEvaluator::inlineIntrinsicIndexOf(node, cg, false);
return true;
Expand Down
26 changes: 16 additions & 10 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1681,11 +1681,14 @@ TR::Register *
J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator * cg, bool isLatin1)
{
cg->generateDebugCounter("z13/simd/indexOf", 1, TR::DebugCounter::Free);

TR::Register* array = cg->evaluate(node->getChild(1));
TR::Register* ch = cg->evaluate(node->getChild(2));
TR::Register* offset = cg->evaluate(node->getChild(3));
TR::Register* length = cg->gprClobberEvaluate(node->getChild(4));
// This evaluator function handles different indexOf() intrinsics, some of which are static calls without a
// receiver. Hence, the need for static call check.
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
TR::Register* array = cg->evaluate(node->getChild(firstCallArgIdx));
TR::Register* ch = cg->evaluate(node->getChild(firstCallArgIdx+1));
TR::Register* offset = cg->evaluate(node->getChild(firstCallArgIdx+2));
TR::Register* length = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));


const int32_t sizeOfVector = cg->machine()->getVRFSize();
Expand Down Expand Up @@ -1818,11 +1821,14 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator

node->setRegister(indexRegister);

cg->recursivelyDecReferenceCount(node->getChild(0));
cg->decReferenceCount(node->getChild(1));
cg->decReferenceCount(node->getChild(2));
cg->decReferenceCount(node->getChild(3));
cg->decReferenceCount(node->getChild(4));
if (!isStaticCall)
{
cg->recursivelyDecReferenceCount(node->getChild(0));
}
for (int32_t i = firstCallArgIdx; i < node->getNumChildren(); i++)
{
cg->decReferenceCount(node->getChild(i));
}

return indexRegister;
}
Expand Down

0 comments on commit 335e720

Please sign in to comment.