Skip to content

Commit

Permalink
Merge pull request #541 from zakkak/2023-07-21-23.0-CPU-sync
Browse files Browse the repository at this point in the history
[23.0] Synchronization with upstream 23.0 branch for July CPU release
  • Loading branch information
zakkak authored Jul 21, 2023
2 parents f285c95 + 74f4d06 commit 35d6811
Show file tree
Hide file tree
Showing 28 changed files with 401 additions and 217 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -238,16 +238,17 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
// Setup the counter
masm.neon.moveVI(ASIMDSize.FullReg, ElementSize.Word, v4, 0);
masm.neon.moveVI(ASIMDSize.FullReg, ElementSize.Word, v5, 1);
masm.neon.insXX(ElementSize.Word, v4, 3, v5, 3);
// v4 contains { 0, 0, 0, 1 }
masm.neon.insXX(ElementSize.Word, v4, 2, v5, 2);
// v4 contains { 0, 1 }

// Load the counter into v0
// 128-bit big-endian increment
masm.fldr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
masm.neon.rev32VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v0);
masm.neon.addVVV(ASIMDSize.FullReg, ElementSize.Word, v16, v16, v4);
masm.neon.rev32VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
// Save the incremented counter back
masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v0);
beAdd128x64(masm, v16, v16, v4, v5);
masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
masm.fstr(128, v16, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
// Previous counter value is in v0
// v4 contains { 0, 1 }

// We have fewer than bulk_width blocks of data left. Encrypt
// them one by one until there is less than a full block
Expand Down Expand Up @@ -277,9 +278,9 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {

// Increment the counter, store it back
masm.neon.orrVVV(ASIMDSize.FullReg, v0, v16, v16);
masm.neon.rev32VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
masm.neon.addVVV(ASIMDSize.FullReg, ElementSize.Word, v16, v16, v4);
masm.neon.rev32VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
beAdd128x64(masm, v16, v16, v4, v5);
masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
// Save the incremented counter back
masm.fstr(128, v16, AArch64Address.createBaseRegisterOnlyAddress(128, counter));

Expand Down Expand Up @@ -311,6 +312,22 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
masm.mov(32, result, savedLen);
}

// Big-endian 128-bit + 64-bit -> 128-bit addition.
// Inputs: 128-bits. in is preserved.
// The least-significant 64-bit word is in the upper dword of each vector.
// inc (the 64-bit increment) is preserved. Its lower dword must be zero.
// Output: result
private static void beAdd128x64(AArch64MacroAssembler masm, Register result, Register in, Register inc, Register tmp) {
// Add inc to the least-significant dword of input
masm.neon.addVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, result, in, inc);
// Check for result overflowing
masm.neon.cmhiVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, tmp, inc, result);
// Swap LSD of comparison result to MSD and MSD == 0 (must be!) to LSD
masm.neon.extVVV(ASIMDSize.FullReg, tmp, tmp, tmp, 0x08);
// Subtract -1 from MSD if there was an overflow
masm.neon.subVVV(ASIMDSize.FullReg, ElementSize.DoubleWord, result, result, tmp);
}

private static void emitCTRLargeBlock(AArch64MacroAssembler masm, int bulkWidth, Register in, Register out, Register counter,
Register usedPtr, Register len, Register used, Register offset, Register keylen) {
GraalError.guarantee(bulkWidth == 4 || bulkWidth == 8, "bulk_width must be 4 or 8");
Expand All @@ -337,20 +354,20 @@ private static void emitCTRLargeBlock(AArch64MacroAssembler masm, int bulkWidth,
// v0 contains the first counter
masm.fldr(128, v0, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
// v16 contains byte-reversed counter
masm.neon.rev32VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v0);
masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v0);

// AES/CTR loop
masm.bind(labelCTRLoop);

// Setup the counters
masm.neon.moveVI(ASIMDSize.FullReg, ElementSize.Word, v8, 0);
masm.neon.moveVI(ASIMDSize.FullReg, ElementSize.Word, v9, 1);
masm.neon.insXX(ElementSize.Word, v8, 3, v9, 3);
// v8 contains { 0, 0, 0, 1 }
masm.neon.insXX(ElementSize.Word, v8, 2, v9, 2);
// v8 contains { 0, 1 }

for (int i = 0; i < bulkWidth; i++) {
masm.neon.rev32VV(ASIMDSize.FullReg, ElementSize.Byte, asFloatRegister(v0, i), v16);
masm.neon.addVVV(ASIMDSize.FullReg, ElementSize.Word, v16, v16, v8);
masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, asFloatRegister(v0, i), v16);
beAdd128x64(masm, v16, v16, v8, v9);
}

masm.neon.ld1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v8, v9, v10, v11,
Expand Down Expand Up @@ -381,7 +398,7 @@ private static void emitCTRLargeBlock(AArch64MacroAssembler masm, int bulkWidth,
masm.cbnz(32, len, labelCTRLoop);

// Save the counter back where it goes
masm.neon.rev32VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
masm.neon.rev64VV(ASIMDSize.FullReg, ElementSize.Byte, v16, v16);
masm.fstr(128, v16, AArch64Address.createBaseRegisterOnlyAddress(128, counter));

masm.neon.ld1MultipleVVVV(ASIMDSize.FullReg, ElementSize.Byte, v8, v9, v10, v11,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
import org.graalvm.compiler.core.common.calc.Condition;
import org.graalvm.compiler.core.common.memory.BarrierType;
import org.graalvm.compiler.core.common.memory.MemoryOrderMode;
import org.graalvm.compiler.nodes.ComputeObjectAddressNode;
import org.graalvm.compiler.nodes.ConstantNode;
import org.graalvm.compiler.nodes.NamedLocationIdentity;
import org.graalvm.compiler.nodes.NodeView;
Expand Down Expand Up @@ -90,7 +89,6 @@
import jdk.vm.ci.code.Architecture;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.MetaAccessProvider;
import jdk.vm.ci.meta.ResolvedJavaMethod;

public class AArch64GraphBuilderPlugins implements TargetGraphBuilderPlugins {
Expand Down Expand Up @@ -507,55 +505,39 @@ private static void registerStringCodingPlugins(InvocationPlugins plugins, Repla
@Override
public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode sa, ValueNode sp,
ValueNode da, ValueNode dp, ValueNode len) {
MetaAccessProvider metaAccess = b.getMetaAccess();
int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte);

ValueNode srcOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), new LeftShiftNode(sp, ConstantNode.forInt(2)), NodeView.DEFAULT);
ValueNode dstOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), dp, NodeView.DEFAULT);

ComputeObjectAddressNode src = b.add(new ComputeObjectAddressNode(sa, srcOffset));
ComputeObjectAddressNode dst = b.add(new ComputeObjectAddressNode(da, dstOffset));

b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ISO_8859_1, JavaKind.Byte));
return true;
try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) {
int charElementShift = CodeUtil.log2(b.getMetaAccess().getArrayIndexScale(JavaKind.Char));
ValueNode src = helper.arrayElementPointer(sa, JavaKind.Byte, LeftShiftNode.create(sp, ConstantNode.forInt(charElementShift), NodeView.DEFAULT));
ValueNode dst = helper.arrayElementPointer(da, JavaKind.Byte, dp);
b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ISO_8859_1, JavaKind.Byte));
return true;
}
}
});
r.register(new InvocationPlugin("implEncodeAsciiArray", char[].class, int.class, byte[].class, int.class, int.class) {
@Override
public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode sa, ValueNode sp,
ValueNode da, ValueNode dp, ValueNode len) {
MetaAccessProvider metaAccess = b.getMetaAccess();
int charArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Char);
int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte);

int charElementShift = CodeUtil.log2(metaAccess.getArrayIndexScale(JavaKind.Char));

ValueNode srcOffset = AddNode.create(ConstantNode.forInt(charArrayBaseOffset), new LeftShiftNode(sp, ConstantNode.forInt(charElementShift)), NodeView.DEFAULT);
ValueNode dstOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), dp, NodeView.DEFAULT);

ComputeObjectAddressNode src = b.add(new ComputeObjectAddressNode(sa, srcOffset));
ComputeObjectAddressNode dst = b.add(new ComputeObjectAddressNode(da, dstOffset));

b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ASCII, JavaKind.Char));
return true;
try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) {
ValueNode src = helper.arrayElementPointer(sa, JavaKind.Char, sp);
ValueNode dst = helper.arrayElementPointer(da, JavaKind.Byte, dp);
b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ASCII, JavaKind.Char));
return true;
}
}
});
r.register(new InvocationPlugin("hasNegatives", byte[].class, int.class, int.class) {
@Override
public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode ba, ValueNode off, ValueNode len) {
try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) {
MetaAccessProvider metaAccess = b.getMetaAccess();
int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte);
helper.intrinsicRangeCheck(off, Condition.LT, ConstantNode.forInt(0));
helper.intrinsicRangeCheck(len, Condition.LT, ConstantNode.forInt(0));

ValueNode arrayLength = b.add(new ArrayLengthNode(ba));
ValueNode limit = b.add(AddNode.create(off, len, NodeView.DEFAULT));
helper.intrinsicRangeCheck(arrayLength, Condition.LT, limit);

ValueNode arrayOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), off, NodeView.DEFAULT);
ComputeObjectAddressNode array = b.add(new ComputeObjectAddressNode(ba, arrayOffset));

ValueNode array = helper.arrayElementPointer(ba, JavaKind.Byte, off);
b.addPush(JavaKind.Boolean, new HasNegativesNode(array, len));
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import org.graalvm.compiler.core.common.calc.Condition;
import org.graalvm.compiler.core.common.memory.BarrierType;
import org.graalvm.compiler.core.common.memory.MemoryOrderMode;
import org.graalvm.compiler.nodes.ComputeObjectAddressNode;
import org.graalvm.compiler.nodes.ConstantNode;
import org.graalvm.compiler.nodes.NamedLocationIdentity;
import org.graalvm.compiler.nodes.NodeView;
Expand Down Expand Up @@ -101,7 +100,6 @@
import jdk.vm.ci.code.Architecture;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.MetaAccessProvider;
import jdk.vm.ci.meta.ResolvedJavaMethod;

public class AMD64GraphBuilderPlugins implements TargetGraphBuilderPlugins {
Expand Down Expand Up @@ -597,35 +595,27 @@ private static void registerStringCodingPlugins(InvocationPlugins plugins, Repla
@Override
public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode sa, ValueNode sp,
ValueNode da, ValueNode dp, ValueNode len) {
MetaAccessProvider metaAccess = b.getMetaAccess();
int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte);

ValueNode srcOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), new LeftShiftNode(sp, ConstantNode.forInt(2)), NodeView.DEFAULT);
ValueNode dstOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), dp, NodeView.DEFAULT);

ComputeObjectAddressNode src = b.add(new ComputeObjectAddressNode(sa, srcOffset));
ComputeObjectAddressNode dst = b.add(new ComputeObjectAddressNode(da, dstOffset));

b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ISO_8859_1, JavaKind.Byte));
return true;
try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) {
int charElementShift = CodeUtil.log2(b.getMetaAccess().getArrayIndexScale(JavaKind.Char));
ValueNode src = helper.arrayElementPointer(sa, JavaKind.Byte, LeftShiftNode.create(sp, ConstantNode.forInt(charElementShift), NodeView.DEFAULT));
ValueNode dst = helper.arrayElementPointer(da, JavaKind.Byte, dp);
b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ISO_8859_1, JavaKind.Byte));
return true;
}
}
});
r.register(new InvocationPlugin("hasNegatives", byte[].class, int.class, int.class) {
@Override
public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode ba, ValueNode off, ValueNode len) {
try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) {
MetaAccessProvider metaAccess = b.getMetaAccess();
int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte);
helper.intrinsicRangeCheck(off, Condition.LT, ConstantNode.forInt(0));
helper.intrinsicRangeCheck(len, Condition.LT, ConstantNode.forInt(0));

ValueNode arrayLength = b.add(new ArrayLengthNode(ba));
ValueNode limit = b.add(AddNode.create(off, len, NodeView.DEFAULT));
helper.intrinsicRangeCheck(arrayLength, Condition.LT, limit);

ValueNode arrayOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), off, NodeView.DEFAULT);
ComputeObjectAddressNode array = b.add(new ComputeObjectAddressNode(ba, arrayOffset));

ValueNode array = helper.arrayElementPointer(ba, JavaKind.Byte, off);
b.addPush(JavaKind.Boolean, new HasNegativesNode(array, len));
return true;
}
Expand All @@ -635,20 +625,12 @@ public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Rec
@Override
public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode sa, ValueNode sp,
ValueNode da, ValueNode dp, ValueNode len) {
MetaAccessProvider metaAccess = b.getMetaAccess();
int charArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Char);
int byteArrayBaseOffset = metaAccess.getArrayBaseOffset(JavaKind.Byte);

int charElementShift = CodeUtil.log2(metaAccess.getArrayIndexScale(JavaKind.Char));

ValueNode srcOffset = AddNode.create(ConstantNode.forInt(charArrayBaseOffset), new LeftShiftNode(sp, ConstantNode.forInt(charElementShift)), NodeView.DEFAULT);
ValueNode dstOffset = AddNode.create(ConstantNode.forInt(byteArrayBaseOffset), dp, NodeView.DEFAULT);

ComputeObjectAddressNode src = b.add(new ComputeObjectAddressNode(sa, srcOffset));
ComputeObjectAddressNode dst = b.add(new ComputeObjectAddressNode(da, dstOffset));

b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ASCII, JavaKind.Char));
return true;
try (InvocationPluginHelper helper = new InvocationPluginHelper(b, targetMethod)) {
ValueNode src = helper.arrayElementPointer(sa, JavaKind.Char, sp);
ValueNode dst = helper.arrayElementPointer(da, JavaKind.Byte, dp);
b.addPush(JavaKind.Int, new EncodeArrayNode(src, dst, len, ASCII, JavaKind.Char));
return true;
}
}
});

Expand Down
Loading

0 comments on commit 35d6811

Please sign in to comment.