From e7c26c2088c7839f4dd3ae31e1e36b18778660ba Mon Sep 17 00:00:00 2001 From: Ben Thomas Date: Thu, 3 Dec 2020 17:58:39 +0000 Subject: [PATCH 1/7] Disable prefetching for aloadi nodes on 64-bit Previously, the code in the insertPrefetchIfNecessary method was expecting that it would only see aloadi nodes on 32-bit. Because of this, the code would not operate correctly and would cause segfaults during compilation. This was previously being handled by simply not calling insertPrefetchIfNecessary under these conditions, however keeping this behaviour requires that OMR know about implementation details of OpenJ9's prefetching. While this code could theoretically be made to work under these conditions, the code as written simply cannot handle this case. Instead, a check has been added to prevent insertPrefetchIfNecessary from operating on aloadi nodes on 64-bit to replicate the old behaviour. Signed-off-by: Ben Thomas --- runtime/compiler/p/codegen/J9CodeGenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/compiler/p/codegen/J9CodeGenerator.cpp b/runtime/compiler/p/codegen/J9CodeGenerator.cpp index 4447a4435a6..59ef75114a0 100644 --- a/runtime/compiler/p/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/p/codegen/J9CodeGenerator.cpp @@ -403,7 +403,7 @@ J9::Power::CodeGenerator::insertPrefetchIfNecessary(TR::Node *node, TR::Register static bool disableStringObjPrefetch = (feGetEnv("TR_DisableStringObjPrefetch") != NULL); bool optDisabled = false; - if (node->getOpCodeValue() == TR::aloadi || + if ((node->getOpCodeValue() == TR::aloadi && !comp->target().is64Bit()) || (comp->target().is64Bit() && comp->useCompressedPointers() && node->getOpCodeValue() == TR::l2a && @@ -631,7 +631,7 @@ J9::Power::CodeGenerator::insertPrefetchIfNecessary(TR::Node *node, TR::Register } } - if (node->getOpCodeValue() == TR::aloadi || + if ((node->getOpCodeValue() == TR::aloadi && !comp->target().is64Bit()) || (comp->target().is64Bit() && comp->useCompressedPointers() && (node->getOpCodeValue() == TR::iloadi || node->getOpCodeValue() == TR::irdbari) && From a98159db0defb4a657b07e6c20745dfb2f0dfed4 Mon Sep 17 00:00:00 2001 From: Ben Thomas Date: Thu, 12 Nov 2020 15:10:56 +0000 Subject: [PATCH 2/7] Add OMRLoadStoreHandler.cpp to builds Signed-off-by: Ben Thomas --- runtime/compiler/build/files/target/p.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/compiler/build/files/target/p.mk b/runtime/compiler/build/files/target/p.mk index 7a14f9245e6..fbb0d6f3dfe 100644 --- a/runtime/compiler/build/files/target/p.mk +++ b/runtime/compiler/build/files/target/p.mk @@ -28,6 +28,7 @@ JIT_PRODUCT_BACKEND_SOURCES+=\ omr/compiler/p/codegen/OMRInstOpCode.cpp \ omr/compiler/p/codegen/OMRInstruction.cpp \ omr/compiler/p/codegen/OMRLinkage.cpp \ + omr/compiler/p/codegen/OMRLoadStoreHandler.cpp \ omr/compiler/p/codegen/OMRMachine.cpp \ omr/compiler/p/codegen/OMRMemoryReference.cpp \ omr/compiler/p/codegen/OMRPeephole.cpp \ From 7322e410b44a7776d10e4838c070f689a3af2a95 Mon Sep 17 00:00:00 2001 From: Ben Thomas Date: Fri, 27 Nov 2020 17:24:49 +0000 Subject: [PATCH 3/7] Update write barriers to use the new LoadStoreHandler API Previously, the code for performing write barriers was using the old MemoryReference-based API for performing its stores. This API is being deprecated in OMR, so all code dealing with write barriers has been updated to use the new LoadStoreHandler API. Signed-off-by: Ben Thomas --- .../compiler/p/codegen/J9TreeEvaluator.cpp | 168 +++--------------- 1 file changed, 24 insertions(+), 144 deletions(-) diff --git a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp index 6da4e82532e..0775e950ca5 100644 --- a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp @@ -65,6 +65,7 @@ #include "p/codegen/GenerateInstructions.hpp" #include "p/codegen/InterfaceCastSnippet.hpp" #include "p/codegen/J9PPCSnippet.hpp" +#include "p/codegen/LoadStoreHandler.hpp" //#include "p/codegen/PPCAheadOfTimeCompile.hpp" #include "p/codegen/PPCEvaluator.hpp" #include "p/codegen/PPCInstruction.hpp" @@ -439,7 +440,6 @@ TR::Register *outlinedHelperWrtbarEvaluator(TR::Node *node, TR::CodeGenerator *c { TR::Register *srcObjectReg = cg->gprClobberEvaluate(node->getFirstChild()); TR::Register *dstObjectReg = cg->gprClobberEvaluate(node->getSecondChild()); - TR::MemoryReference *memRef; TR::Compilation* comp = cg->comp(); TR::Symbol *storeSym = node->getSymbolReference()->getSymbol(); @@ -454,24 +454,14 @@ TR::Register *outlinedHelperWrtbarEvaluator(TR::Node *node, TR::CodeGenerator *c } else { - memRef = TR::MemoryReference::createWithRootLoadOrStore(cg, node, TR::Compiler->om.sizeofReferenceAddress()); - if (needSync) - generateInstruction(cg, TR::InstOpCode::lwsync, node); - generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, node, memRef, srcObjectReg); - if (needSync) - { - // ordered and lazySet operations will not generate a post-write sync - // and will not mark unresolved snippets with in sync sequence to avoid - // PicBuilder overwriting the instruction that normally would have been sync - TR::TreeEvaluator::postSyncConditions(node, cg, srcObjectReg, memRef, TR::InstOpCode::sync, lazyVolatile); - } + TR::LoadStoreHandler::generateStoreNodeSequence(cg, srcObjectReg, node, TR::InstOpCode::Op_st, TR::Compiler->om.sizeofReferenceAddress()); + if (!node->getFirstChild()->isNull()) VMoutlinedHelperWrtbarEvaluator(node, srcObjectReg, dstObjectReg, node->getFirstChild()->isNonNull(), cg); } cg->decReferenceCount(node->getFirstChild()); cg->decReferenceCount(node->getSecondChild()); - memRef->decNodeReferenceCounts(cg); if (srcObjectReg != node->getFirstChild()->getRegister()) cg->stopUsingRegister(srcObjectReg); @@ -503,7 +493,7 @@ static int32_t getOffsetOfJ9ObjectFlags() } static void VMnonNullSrcWrtBarCardCheckEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, TR::Register *condReg, TR::Register *temp1Reg, TR::Register *temp2Reg, - TR::Register *temp3Reg, TR::Register *temp4Reg, TR::LabelSymbol *doneLabel, TR::RegisterDependencyConditions *deps, TR::MemoryReference *tempMR, bool dstAddrComputed, + TR::Register *temp3Reg, TR::Register *temp4Reg, TR::LabelSymbol *doneLabel, TR::RegisterDependencyConditions *deps, bool isCompressedRef, TR::CodeGenerator *cg, bool flagsInTemp1 = false) { // non-heap objects cannot be marked @@ -657,14 +647,6 @@ static void VMnonNullSrcWrtBarCardCheckEvaluator(TR::Node *node, TR::Register *s } else if (comp->getOptions()->realTimeGC()) { - if (!dstAddrComputed) - { - if (tempMR->getIndexRegister() != NULL) - generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, temp3Reg, tempMR->getBaseRegister(), tempMR->getIndexRegister()); - else - generateTrg1MemInstruction(cg, TR::InstOpCode::addi2, node, temp3Reg, tempMR); - } - if (!comp->getOption(TR_DisableInlineWriteBarriersRT)) { // check if barrier is enabled: if disabled then branch around the rest @@ -795,7 +777,7 @@ static void VMCardCheckEvaluator(TR::Node *node, TR::Register *dstReg, TR::Regis } -static void VMwrtbarEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, TR::Register *dstAddrReg, TR::MemoryReference *tempMR, +static void VMwrtbarEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, TR::Register *dstAddrReg, TR::RegisterDependencyConditions *conditions, bool srcNonNull, bool needDeps, bool isCompressedRef, TR::CodeGenerator *cg, TR::Register *flagsReg = NULL) { TR::Compilation *comp = cg->comp(); @@ -844,19 +826,6 @@ static void VMwrtbarEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register conditions->getPostConditions()->getRegisterDependency(3)->setExcludeGPR0(); //3=temp2Reg TR::addDependency(conditions, dstAddrReg, TR::RealRegister::gr4, TR_GPR, cg); temp3Reg = dstAddrReg; - - if (node->getSymbolReference()->isUnresolved()) - { - if (tempMR->getBaseRegister() != NULL) - { - TR::addDependency(conditions, tempMR->getBaseRegister(), TR::RealRegister::NoReg, TR_GPR, cg); - conditions->getPreConditions()->getRegisterDependency(3)->setExcludeGPR0(); - conditions->getPostConditions()->getRegisterDependency(3)->setExcludeGPR0(); - } - - if (tempMR->getIndexRegister() != NULL) - TR::addDependency(conditions, tempMR->getIndexRegister(), TR::RealRegister::NoReg, TR_GPR, cg); - } } else TR::addDependency(conditions, temp2Reg, TR::RealRegister::NoReg, TR_GPR, cg); @@ -890,7 +859,7 @@ static void VMwrtbarEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register generateConditionalBranchInstruction(cg, TR::InstOpCode::beq, node, label, cr0); } - VMnonNullSrcWrtBarCardCheckEvaluator(node, srcReg, dstReg, cr0, temp1Reg, temp2Reg, temp3Reg, temp4Reg, label, conditions, tempMR, false, isCompressedRef, cg, + VMnonNullSrcWrtBarCardCheckEvaluator(node, srcReg, dstReg, cr0, temp1Reg, temp2Reg, temp3Reg, temp4Reg, label, conditions, isCompressedRef, cg, flagsReg != NULL); generateDepLabelInstruction(cg, TR::InstOpCode::label, node, label, conditions); if (doCrdMrk) @@ -1345,20 +1314,11 @@ TR::Register *J9::Power::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::Cod } } - TR::MemoryReference *tempMR = NULL; TR::Register *destinationRegister = cg->evaluate(node->getSecondChild()); TR::Register *flagsReg = NULL; TR::Node *firstChild = node->getFirstChild(); TR::Register *sourceRegister; bool killSource = false; - bool needSync = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && comp->target().isSMP()); - bool lazyVolatile = false; - - if (node->getSymbolReference()->getSymbol()->isShadow() && node->getSymbolReference()->getSymbol()->isOrdered() && comp->target().isSMP()) - { - needSync = true; - lazyVolatile = true; - } if (firstChild->getReferenceCount() > 1 && firstChild->getRegister() != NULL) { @@ -1386,45 +1346,18 @@ TR::Register *J9::Power::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::Cod // RealTimeGC write barriers occur BEFORE the store if (comp->getOptions()->realTimeGC()) { - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node, TR::Compiler->om.sizeofReferenceAddress()); TR::Register *destinationAddressRegister = cg->allocateRegister(); - VMwrtbarEvaluator(node, sourceRegister, destinationRegister, destinationAddressRegister, tempMR, NULL, firstChild->isNonNull(), true, false, cg); - - TR::MemoryReference *dstMR = TR::MemoryReference::createWithDisplacement(cg, destinationAddressRegister, 0, TR::Compiler->om.sizeofReferenceAddress()); - if (needSync) - generateInstruction(cg, TR::InstOpCode::lwsync, node); - - generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, node, dstMR, sourceRegister); - - if (needSync) - { - // ordered and lazySet operations will not generate a post-write sync - // and will not mark unresolved snippets with in sync sequence to avoid - // PicBuilder overwriting the instruction that normally would have been sync - postSyncConditions(node, cg, sourceRegister, dstMR, TR::InstOpCode::sync, lazyVolatile); - } + TR::LoadStoreHandler::generateComputeAddressSequence(cg, destinationAddressRegister, node); + VMwrtbarEvaluator(node, sourceRegister, destinationRegister, destinationAddressRegister, NULL, firstChild->isNonNull(), true, false, cg); + TR::LoadStoreHandler::generateStoreAddressSequence(cg, sourceRegister, node, destinationAddressRegister, TR::InstOpCode::Op_st, TR::Compiler->om.sizeofReferenceAddress()); cg->stopUsingRegister(destinationAddressRegister); } else { - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node, TR::Compiler->om.sizeofReferenceAddress()); - - if (needSync) - generateInstruction(cg, TR::InstOpCode::lwsync, node); - - generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, node, tempMR, sourceRegister); - - if (needSync) - { - // ordered and lazySet operations will not generate a post-write sync - // and will not mark unresolved snippets with in sync sequence to avoid - // PicBuilder overwriting the instruction that normally would have been sync - postSyncConditions(node, cg, sourceRegister, tempMR, TR::InstOpCode::sync, lazyVolatile); - } - - VMwrtbarEvaluator(node, sourceRegister, destinationRegister, NULL, NULL, NULL, firstChild->isNonNull(), true, false, cg, flagsReg); + TR::LoadStoreHandler::generateStoreNodeSequence(cg, sourceRegister, node, TR::InstOpCode::Op_st, TR::Compiler->om.sizeofReferenceAddress()); + VMwrtbarEvaluator(node, sourceRegister, destinationRegister, NULL, NULL, firstChild->isNonNull(), true, false, cg, flagsReg); } if (killSource) @@ -1432,7 +1365,6 @@ TR::Register *J9::Power::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::Cod cg->decReferenceCount(node->getFirstChild()); cg->decReferenceCount(node->getSecondChild()); - tempMR->decNodeReferenceCounts(cg); return NULL; } @@ -1455,8 +1387,6 @@ TR::Register *J9::Power::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::Co bool usingCompressedPointers = false; bool bumpedRefCount = false; - bool needSync = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && comp->target().isSMP()); - bool lazyVolatile = false; if (comp->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isArrayShadowSymbol()) { @@ -1466,12 +1396,6 @@ TR::Register *J9::Power::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::Co TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg); } - if (node->getSymbolReference()->getSymbol()->isShadow() && node->getSymbolReference()->getSymbol()->isOrdered() && comp->target().isSMP()) - { - needSync = true; - lazyVolatile = true; - } - if (comp->useCompressedPointers() && (node->getSymbolReference()->getSymbol()->getDataType() == TR::Address) && (node->getSecondChild()->getDataType() != TR::Address)) { usingCompressedPointers = true; @@ -1485,8 +1409,6 @@ TR::Register *J9::Power::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::Co if (usingCompressedPointers) sizeofMR = TR::Compiler->om.sizeofReferenceField(); - TR::MemoryReference *tempMR = NULL; - TR::Register *compressedReg; if (secondChild->getReferenceCount() > 1 && secondChild->getRegister() != NULL) { @@ -1519,63 +1441,24 @@ TR::Register *J9::Power::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::Co generateTrg1MemInstruction(cg, TR::InstOpCode::lwz, node, flagsReg, TR::MemoryReference::createWithDisplacement(cg, destinationRegister, getOffsetOfJ9ObjectFlags(), 4)); } + TR::InstOpCode::Mnemonic storeOp = usingCompressedPointers ? TR::InstOpCode::stw : TR::InstOpCode::Op_st; + TR::Register *storeRegister = usingCompressedPointers ? compressedReg : sourceRegister; + // RealTimeGC write barriers occur BEFORE the store if (comp->getOptions()->realTimeGC()) { - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node, sizeofMR); - TR::Register *destinationAddressRegister = cg->allocateRegister(); - VMwrtbarEvaluator(node, sourceRegister, destinationRegister, destinationAddressRegister, tempMR, NULL, secondChild->isNonNull(), true, usingCompressedPointers, cg); - - TR::MemoryReference *dstMR = TR::MemoryReference::createWithDisplacement(cg, destinationAddressRegister, 0, TR::Compiler->om.sizeofReferenceAddress()); - - if (needSync) - generateInstruction(cg, TR::InstOpCode::lwsync, node); - if (usingCompressedPointers) - generateMemSrc1Instruction(cg, TR::InstOpCode::stw, node, dstMR, compressedReg); - else - generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, node, dstMR, sourceRegister); - - if (needSync) - { - // ordered and lazySet operations will not generate a post-write sync - // and will not mark unresolved snippets with in sync sequence to avoid - // PicBuilder overwriting the instruction that normally would have been sync - if (usingCompressedPointers) - postSyncConditions(node, cg, compressedReg, dstMR, TR::InstOpCode::sync, lazyVolatile); - else - postSyncConditions(node, cg, sourceRegister, dstMR, TR::InstOpCode::sync, lazyVolatile); - } + TR::LoadStoreHandler::generateComputeAddressSequence(cg, destinationAddressRegister, node); + VMwrtbarEvaluator(node, storeRegister, destinationRegister, destinationAddressRegister, NULL, secondChild->isNonNull(), true, usingCompressedPointers, cg); + TR::LoadStoreHandler::generateStoreAddressSequence(cg, sourceRegister, node, destinationAddressRegister, storeOp, sizeofMR); cg->stopUsingRegister(destinationAddressRegister); } else { - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node, sizeofMR); - - if (needSync) - generateInstruction(cg, TR::InstOpCode::lwsync, node); - - if (usingCompressedPointers) - generateMemSrc1Instruction(cg, TR::InstOpCode::stw, node, tempMR, compressedReg); - else - generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, node, tempMR, sourceRegister); - - if (needSync) - { - // ordered and lazySet operations will not generate a post-write sync - // and will not mark unresolved snippets with in sync sequence to avoid - // PicBuilder overwriting the instruction that normally would have been sync - if (usingCompressedPointers) - postSyncConditions(node, cg, compressedReg, tempMR, TR::InstOpCode::sync, lazyVolatile); - else - postSyncConditions(node, cg, sourceRegister, tempMR, TR::InstOpCode::sync, lazyVolatile); - } - - cg->insertPrefetchIfNecessary(node, sourceRegister); - - VMwrtbarEvaluator(node, sourceRegister, destinationRegister, NULL, NULL, NULL, secondChild->isNonNull(), true, usingCompressedPointers, cg, flagsReg); + TR::LoadStoreHandler::generateStoreNodeSequence(cg, storeRegister, node, storeOp, sizeofMR); + VMwrtbarEvaluator(node, sourceRegister, destinationRegister, NULL, NULL, secondChild->isNonNull(), true, usingCompressedPointers, cg, flagsReg); } if (killSource) @@ -1583,7 +1466,6 @@ TR::Register *J9::Power::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::Co cg->decReferenceCount(node->getSecondChild()); cg->decReferenceCount(node->getChild(2)); - tempMR->decNodeReferenceCounts(cg); if (comp->useCompressedPointers()) node->setStoreAlreadyEvaluated(true); @@ -8466,7 +8348,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera } VMnonNullSrcWrtBarCardCheckEvaluator(node, comp->useCompressedPointers() ? wrtbarSrcReg : newVReg, objReg, cndReg, temp1Reg, temp2Reg, temp3Reg, temp4Reg, doneLabel, - conditions, NULL, false, false, cg); + conditions, false, cg); cg->stopUsingRegister(temp1Reg); cg->stopUsingRegister(temp2Reg); @@ -8517,18 +8399,16 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera } TR::Register *dstAddrReg = NULL; - bool dstAddrComputed = false; if (comp->getOptions()->realTimeGC()) { dstAddrReg = cg->allocateRegister(); TR::addDependency(conditions, dstAddrReg, TR::RealRegister::gr4, TR_GPR, cg); generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, dstAddrReg, objReg, offsetReg); - dstAddrComputed = true; } VMnonNullSrcWrtBarCardCheckEvaluator(node, comp->useCompressedPointers() ? translatedNode->getRegister() : newVReg, objReg, cndReg, temp1Reg, temp2Reg, dstAddrReg, NULL, - wrtBarEndLabel, conditions, NULL, dstAddrComputed, false, cg); + wrtBarEndLabel, conditions, false, cg); if (comp->getOptions()->realTimeGC()) cg->stopUsingRegister(dstAddrReg); @@ -9989,13 +9869,13 @@ static TR::Register *inlineConcurrentLinkedQueueTMOffer(TR::Node *node, TR::Code conditions->getPostConditions()->getRegisterDependency(5)->setExcludeGPR0(); //5=temp2Reg } - VMnonNullSrcWrtBarCardCheckEvaluator(node, nReg, pReg, cndReg, qReg, retryCountReg, temp3Reg, temp4Reg, wrtbar1Donelabel, conditions, NULL, false, false, cg, false); + VMnonNullSrcWrtBarCardCheckEvaluator(node, nReg, pReg, cndReg, qReg, retryCountReg, temp3Reg, temp4Reg, wrtbar1Donelabel, conditions, false, cg, false); // ALG: *** wrtbar1Donelabel generateLabelInstruction(cg, TR::InstOpCode::label, node, wrtbar1Donelabel); generateTrg1Src1Instruction(cg, TR::InstOpCode::mr, node, pReg, objReg); - VMnonNullSrcWrtBarCardCheckEvaluator(node, nReg, pReg, cndReg, qReg, retryCountReg, temp3Reg, temp4Reg, returnLabel, conditions, NULL, false, false, cg, false); + VMnonNullSrcWrtBarCardCheckEvaluator(node, nReg, pReg, cndReg, qReg, retryCountReg, temp3Reg, temp4Reg, returnLabel, conditions, false, cg, false); } else if (doCrdMrk) @@ -10166,7 +10046,7 @@ static TR::Register *inlineConcurrentLinkedQueueTMPoll(TR::Node *node, TR::CodeG conditions->getPostConditions()->getRegisterDependency(4)->setExcludeGPR0(); //4=temp2Reg } - VMnonNullSrcWrtBarCardCheckEvaluator(node, qReg, objReg, cndReg, nullReg, pReg, temp3Reg, temp4Reg, returnLabel, conditions, NULL, false, false, cg, false); + VMnonNullSrcWrtBarCardCheckEvaluator(node, qReg, objReg, cndReg, nullReg, pReg, temp3Reg, temp4Reg, returnLabel, conditions, false, cg, false); } else if (doCrdMrk) { From 46fc477f605ddc1158a12335082609d905f07b53 Mon Sep 17 00:00:00 2001 From: Ben Thomas Date: Fri, 27 Nov 2020 17:27:33 +0000 Subject: [PATCH 4/7] Update read barriers to use the new LoadStoreHandler API Previously, the code for performing read barriers was using the old MemoryReference-based API for performing its loads. This API is being deprecated in OMR, so all code dealing with read barriers has been updated to use the new LoadStoreHandler API. Unfortunately, this code cannot be updated in its current form to use LoadStoreHandler as intended since it unavoidably performs the load during ICF. Thus, it currently uses generateComputeAddressSequence to compute the address but does not use generateLoadAddressSequence to perform the load. This is safe since OpenJ9 is the most derived project that could possibly overload LoadStoreHandler and emitting unnecessary sync instructions (while bad for performance) is not incorrect. This code should be revisited later to make it more idiomatic, but that is beyond the scope of this particular change. Signed-off-by: Ben Thomas --- .../compiler/p/codegen/J9TreeEvaluator.cpp | 55 ++----------------- 1 file changed, 6 insertions(+), 49 deletions(-) diff --git a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp index 0775e950ca5..0513910d3ed 100644 --- a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp @@ -1479,7 +1479,6 @@ TR::Register *iGenerateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg TR_ASSERT_FATAL(false, "Concurrent Scavenger not supported."); #else TR::Compilation *comp = cg->comp(); - TR::MemoryReference *tempMR = NULL; TR::Register *objReg = cg->allocateRegister(); TR::Register *locationReg = cg->allocateRegister(); @@ -1510,25 +1509,8 @@ TR::Register *iGenerateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg } node->setRegister(objReg); - bool needSync = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && comp->target().isSMP()); - // If the reference is volatile or potentially volatile, we keep a fixed instruction - // layout in order to patch if it turns out that the reference isn't really volatile. - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node, 4); - if (tempMR->getIndexRegister() != NULL) - generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, locationReg, tempMR->getBaseRegister(), tempMR->getIndexRegister()); - else - generateTrg1MemInstruction(cg, TR::InstOpCode::addi2, node, locationReg, tempMR); - - /* - * Loading the effective address does not actually need a sync instruction. - * However, this point sometimes gets patched so a nop is needed here until all the patching code is updated. - */ - //TODO: Patching code needs to be updated to know not to expect a sync instruction here. - if (needSync) - { - TR::TreeEvaluator::postSyncConditions(node, cg, locationReg, tempMR, TR::InstOpCode::nop); - } + TR::LoadStoreHandler::generateComputeAddressSequence(cg, locationReg, node); generateTrg1MemInstruction(cg, TR::InstOpCode::lwz, node, objReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0, 4)); @@ -1553,16 +1535,14 @@ TR::Register *iGenerateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg generateDepLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, deps); - //TODO: Patching code needs to be updated to be able to patch out these new sync instructions. - if (needSync) + // TODO: Allow this to be patched or skipped at runtime for unresolved symrefs + if (node->getSymbol()->isSyncVolatile() && comp->target().isSMP()) { generateInstruction(cg, comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P7) ? TR::InstOpCode::lwsync : TR::InstOpCode::isync, node); } cg->insertPrefetchIfNecessary(node, objReg); - tempMR->decNodeReferenceCounts(cg); - cg->stopUsingRegister(evacuateReg); cg->stopUsingRegister(locationReg); cg->stopUsingRegister(r3Reg); @@ -1581,7 +1561,6 @@ TR::Register *aGenerateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg TR_ASSERT_FATAL(false, "Concurrent Scavenger not supported."); #else TR::Compilation *comp = cg->comp(); - TR::MemoryReference *tempMR = NULL; TR::Register *tempReg; TR::Register *locationReg = cg->allocateRegister(); @@ -1619,29 +1598,9 @@ TR::Register *aGenerateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg deps->addPostCondition(metaReg, TR::RealRegister::NoReg); deps->addPostCondition(condReg, TR::RealRegister::NoReg); - // If the reference is volatile or potentially volatile, we keep a fixed instruction - // layout in order to patch if it turns out that the reference isn't really volatile. - - bool needSync = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && comp->target().isSMP()); - - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node, TR::Compiler->om.sizeofReferenceAddress()); - node->setRegister(tempReg); - if (tempMR->getIndexRegister() != NULL) - generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, locationReg, tempMR->getBaseRegister(), tempMR->getIndexRegister()); - else - generateTrg1MemInstruction(cg, TR::InstOpCode::addi2, node, locationReg, tempMR); - - /* - * Loading the effective address does not actually need a sync instruction. - * However, this point sometimes gets patched so a nop is needed here until all the patching code is updated. - */ - //TODO: Patching code needs to be updated to know not to expect a sync instruction here. - if (needSync) - { - TR::TreeEvaluator::postSyncConditions(node, cg, locationReg, tempMR, TR::InstOpCode::nop); - } + TR::LoadStoreHandler::generateComputeAddressSequence(cg, locationReg, node); generateTrg1MemInstruction(cg, TR::InstOpCode::Op_load, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0, TR::Compiler->om.sizeofReferenceAddress())); @@ -1673,14 +1632,12 @@ TR::Register *aGenerateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg generateDepLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, deps); - //TODO: Patching code needs to be updated to be able to patch out these new sync instructions. - if (needSync) + // TODO: Allow this to be patched or skipped at runtime for unresolved symrefs + if (node->getSymbol()->isSyncVolatile() && comp->target().isSMP()) { generateInstruction(cg, comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P7) ? TR::InstOpCode::lwsync : TR::InstOpCode::isync, node); } - tempMR->decNodeReferenceCounts(cg); - cg->stopUsingRegister(evacuateReg); cg->stopUsingRegister(locationReg); cg->stopUsingRegister(r3Reg); From bf696f2d44715fd5a8582672ef80c93e7ed92be7 Mon Sep 17 00:00:00 2001 From: Ben Thomas Date: Fri, 27 Nov 2020 17:35:44 +0000 Subject: [PATCH 5/7] Update simple read monitors to use the new LoadStoreHandler API Previously, the code for emitting simple read monitors was using the old MemoryReference-based API for performing the load. This API is being deprecated in OMR, so all code dealing with read barriers has been updated to use the new LoadStoreHandler API. Signed-off-by: Ben Thomas --- .../compiler/p/codegen/J9TreeEvaluator.cpp | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp index 0513910d3ed..b66d9ab38fc 100644 --- a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp @@ -6928,11 +6928,7 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o return false; if (nextTopNode->getOpCodeValue() == TR::treetop || nextTopNode->getOpCodeValue() == TR::iRegStore) nextTopNode = nextTopNode->getFirstChild(); - if (!nextTopNode->getOpCode().isMemoryReference()) - return false; - if (nextTopNode->getSymbolReference()->isUnresolved()) - return false; - if (nextTopNode->getSymbolReference()->getSymbol()->isSyncVolatile() && comp->target().isSMP()) + if (!TR::LoadStoreHandler::isSimpleLoad(cg, nextTopNode)) return false; // possible TODO: expand the set of load types we can handle if (nextTopNode->getOpCodeValue() != TR::aloadi && nextTopNode->getOpCodeValue() != TR::iloadi) @@ -7069,7 +7065,7 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o // the following code is derived from the iaload and iiload evaluators TR::Register *loadResultReg; - TR::MemoryReference *tempMR = NULL; + OMR::Power::NodeMemoryReference tempMR; if (nextTopNode->getOpCodeValue() == TR::aloadi) loadResultReg = cg->allocateCollectedReferenceRegister(); else @@ -7085,26 +7081,27 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o { if (TR::Compiler->om.compressObjectReferences() && nextTopNode->getSymbol()->isClassObject()) { - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, nextTopNode, 4); + tempMR = TR::LoadStoreHandler::generateSimpleLoadMemoryReference(cg, nextTopNode, 4); loadOpCode = TR::InstOpCode::lwz; } else { - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, nextTopNode, 8); + tempMR = TR::LoadStoreHandler::generateSimpleLoadMemoryReference(cg, nextTopNode, 8); loadOpCode = TR::InstOpCode::ld; } } else { - tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, nextTopNode, 4); + tempMR = TR::LoadStoreHandler::generateSimpleLoadMemoryReference(cg, nextTopNode, 4); loadOpCode = TR::InstOpCode::lwz; } + TR_ASSERT_FATAL_WITH_NODE(nextTopNode, !tempMR.getMemoryReference()->getIndexRegister(), "Simple read monitors do not currently support indexed loads"); // end of code derived from the iaload and iiload evaluators TR::addDependency(conditions, loadResultReg, TR::RealRegister::NoReg, TR_GPR, cg); - if (tempMR->getBaseRegister() != objReg) + if (tempMR.getMemoryReference()->getBaseRegister() != objReg) { - TR::addDependency(conditions, tempMR->getBaseRegister(), TR::RealRegister::NoReg, TR_GPR, cg); + TR::addDependency(conditions, tempMR.getMemoryReference()->getBaseRegister(), TR::RealRegister::NoReg, TR_GPR, cg); conditions->getPreConditions()->getRegisterDependency(4)->setExcludeGPR0(); conditions->getPostConditions()->getRegisterDependency(4)->setExcludeGPR0(); } @@ -7114,7 +7111,7 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o if (objectClassReg) baseReg = objectClassReg; - bool altsequence = (comp->target().cpu.is(OMR_PROCESSOR_PPC_GP) || comp->target().cpu.is(OMR_PROCESSOR_PPC_GR)) && tempMR->getBaseRegister() == objReg; + bool altsequence = (comp->target().cpu.is(OMR_PROCESSOR_PPC_GP) || comp->target().cpu.is(OMR_PROCESSOR_PPC_GR)) && tempMR.getMemoryReference()->getBaseRegister() == objReg; TR::LabelSymbol *loopLabel, *restartLabel, *recurCheckLabel, *monExitCallLabel; loopLabel = generateLabelSymbol(cg); @@ -7153,8 +7150,8 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o opCode = TR::InstOpCode::stwcx_r; generateMemSrc1Instruction(cg, opCode, node, TR::MemoryReference::createWithIndexReg(cg, baseReg, offsetReg, lockSize), metaReg); generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, PPCOpProp_BranchUnlikely, node, loopLabel, cndReg); - generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tempMR->getBaseRegister(), tempMR->getBaseRegister(), monitorReg); - generateTrg1MemInstruction(cg, loadOpCode, node, loadResultReg, tempMR); + generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tempMR.getMemoryReference()->getBaseRegister(), tempMR.getMemoryReference()->getBaseRegister(), monitorReg); + generateTrg1MemInstruction(cg, loadOpCode, node, loadResultReg, tempMR.getMemoryReference()); if (needlwsync) generateInstruction(cg, TR::InstOpCode::lwsync, node); else @@ -7168,8 +7165,8 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o } else { - generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tempMR->getBaseRegister(), tempMR->getBaseRegister(), monitorReg); - generateTrg1MemInstruction(cg, loadOpCode, node, loadResultReg, tempMR); + generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tempMR.getMemoryReference()->getBaseRegister(), tempMR.getMemoryReference()->getBaseRegister(), monitorReg); + generateTrg1MemInstruction(cg, loadOpCode, node, loadResultReg, tempMR.getMemoryReference()); if (needlwsync) generateInstruction(cg, TR::InstOpCode::lwsync, node); else @@ -7197,12 +7194,12 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o cg->decReferenceCount(objNode); TR::Snippet *snippet = new (cg->trHeapMemory()) TR::PPCReadMonitorSnippet(cg, node, secondNextTopNode, recurCheckLabel, monExitCallLabel, restartLabel, loadOpCode, - tempMR->getOffset(), objectClassReg); + tempMR.getMemoryReference()->getOffset(), objectClassReg); cg->addSnippet(snippet); // the load and monexit trees need reference count adjustments and // must not be evaluated - tempMR->decNodeReferenceCounts(cg); + tempMR.decReferenceCounts(cg); cg->decReferenceCount(nextTopNode); if (secondNextTopNode == secondNextTreeTop->getNode()) cg->recursivelyDecReferenceCount(secondNextTopNode->getFirstChild()); From b93e826a2c87fcf670be5f0abe8bb439e9d2ba59 Mon Sep 17 00:00:00 2001 From: Ben Thomas Date: Tue, 19 Jan 2021 10:53:32 -0500 Subject: [PATCH 6/7] Update copyright dates to 2021 Signed-off-by: Ben Thomas --- runtime/compiler/build/files/target/p.mk | 2 +- runtime/compiler/p/codegen/J9CodeGenerator.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/compiler/build/files/target/p.mk b/runtime/compiler/build/files/target/p.mk index fbb0d6f3dfe..1139ceb3484 100644 --- a/runtime/compiler/build/files/target/p.mk +++ b/runtime/compiler/build/files/target/p.mk @@ -1,4 +1,4 @@ -# Copyright (c) 2000, 2020 IBM Corp. and others +# Copyright (c) 2000, 2021 IBM Corp. and others # # This program and the accompanying materials are made available under # the terms of the Eclipse Public License 2.0 which accompanies this diff --git a/runtime/compiler/p/codegen/J9CodeGenerator.cpp b/runtime/compiler/p/codegen/J9CodeGenerator.cpp index 59ef75114a0..5bc8a0c9780 100644 --- a/runtime/compiler/p/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/p/codegen/J9CodeGenerator.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2020 IBM Corp. and others + * Copyright (c) 2000, 2021 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this From f8d77f91e715b71d9940157a813c53ddcbcd97d5 Mon Sep 17 00:00:00 2001 From: Ben Thomas Date: Thu, 11 Feb 2021 00:04:24 -0500 Subject: [PATCH 7/7] Remove assert at Julian's request Signed-off-by: Ben Thomas --- runtime/compiler/p/codegen/J9TreeEvaluator.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp index b66d9ab38fc..08bfd01e6cc 100644 --- a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp @@ -7095,7 +7095,6 @@ static bool simpleReadMonitor(TR::Node *node, TR::CodeGenerator *cg, TR::Node *o tempMR = TR::LoadStoreHandler::generateSimpleLoadMemoryReference(cg, nextTopNode, 4); loadOpCode = TR::InstOpCode::lwz; } - TR_ASSERT_FATAL_WITH_NODE(nextTopNode, !tempMR.getMemoryReference()->getIndexRegister(), "Simple read monitors do not currently support indexed loads"); // end of code derived from the iaload and iiload evaluators TR::addDependency(conditions, loadResultReg, TR::RealRegister::NoReg, TR_GPR, cg);