Skip to content

Commit

Permalink
S390 [simd]: Implement vector load and zero
Browse files Browse the repository at this point in the history
This CL takes advantage of the z15 `load byte reverse element`
instruction to optimize Simd Load and Zero opcodes.

On the simulator we only run `load element` as reversing is
not required.

Change-Id: I868bda865249cdc525f804c8ddf4d45df5977a86
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3132965
Reviewed-by: Junliang Yan <[email protected]>
Commit-Queue: Milad Fa <[email protected]>
Cr-Commit-Position: refs/heads/main@{#76610}
  • Loading branch information
miladfarca authored and V8 LUCI CQ committed Aug 31, 2021
1 parent 2e5e2f1 commit 9cc4140
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 0 deletions.
32 changes: 32 additions & 0 deletions src/codegen/s390/macro-assembler-s390.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3910,6 +3910,7 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
}
}

// Vector LE Load and Transform instructions.
void TurboAssembler::LoadAndSplat8x16LE(Simd128Register dst,
const MemOperand& mem) {
vlrep(dst, mem, Condition(0));
Expand Down Expand Up @@ -3960,6 +3961,26 @@ LOAD_EXTEND_LIST(LOAD_EXTEND)
#undef LOAD_EXTEND
#undef LOAD_EXTEND

void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vlebrf(dst, mem, Condition(3));
return;
}
LoadU32LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 3), Condition(2));
}

void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
vlebrg(dst, mem, Condition(1));
return;
}
LoadU64LE(r1, mem);
vlvg(dst, r1, MemOperand(r0, 1), Condition(3));
}

#else
void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
Register scratch) {
Expand Down Expand Up @@ -4032,6 +4053,7 @@ void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
StoreV128(src, mem, scratch1);
}

// Vector LE Load and Transform instructions.
#define LOAD_SPLAT_LIST(V) \
V(64x2, 3) \
V(32x4, 2) \
Expand Down Expand Up @@ -4066,6 +4088,16 @@ LOAD_EXTEND_LIST(LOAD_EXTEND)
#undef LOAD_EXTEND
#undef LOAD_EXTEND

void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
vlef(dst, mem, Condition(3));
}

void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) {
vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
vleg(dst, mem, Condition(1));
}

#endif

// Load And Test (Reg <- Reg)
Expand Down
3 changes: 3 additions & 0 deletions src/codegen/s390/macro-assembler-s390.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register scratch1);
void LoadF64LE(DoubleRegister dst, const MemOperand& opnd, Register scratch);
void LoadF32LE(DoubleRegister dst, const MemOperand& opnd, Register scratch);
// Vector LE Load and Transform instructions.
void LoadAndSplat64x2LE(Simd128Register dst, const MemOperand& mem);
void LoadAndSplat32x4LE(Simd128Register dst, const MemOperand& mem);
void LoadAndSplat16x8LE(Simd128Register dst, const MemOperand& mem);
Expand All @@ -402,6 +403,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void LoadAndExtend16x4SLE(Simd128Register dst, const MemOperand& mem);
void LoadAndExtend32x2ULE(Simd128Register dst, const MemOperand& mem);
void LoadAndExtend32x2SLE(Simd128Register dst, const MemOperand& mem);
void LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem);
void LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem);

// Load And Test
void LoadAndTest32(Register dst, Register src);
Expand Down
14 changes: 14 additions & 0 deletions src/compiler/backend/s390/code-generator-s390.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3439,6 +3439,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef LOAD_EXTEND
#define LOAD_AND_ZERO(type) \
AddressingMode mode = kMode_None; \
MemOperand operand = i.MemoryOperand(&mode); \
Simd128Register dst = i.OutputSimd128Register(); \
__ LoadV##type##ZeroLE(dst, operand);
case kS390_S128Load32Zero: {
LOAD_AND_ZERO(32);
break;
}
case kS390_S128Load64Zero: {
LOAD_AND_ZERO(64);
break;
}
#undef LOAD_AND_ZERO
case kS390_StoreCompressTagged: {
CHECK(!instr->HasOutput());
size_t index = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/compiler/backend/s390/instruction-codes-s390.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,8 @@ namespace compiler {
V(S390_S128Load16x4U) \
V(S390_S128Load32x2S) \
V(S390_S128Load32x2U) \
V(S390_S128Load32Zero) \
V(S390_S128Load64Zero) \
V(S390_StoreSimd128) \
V(S390_LoadSimd128) \
V(S390_StoreCompressTagged) \
Expand Down
2 changes: 2 additions & 0 deletions src/compiler/backend/s390/instruction-scheduler-s390.cc
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_S128Load16x4U:
case kS390_S128Load32x2S:
case kS390_S128Load32x2U:
case kS390_S128Load32Zero:
case kS390_S128Load64Zero:
return kIsLoadOperation;

case kS390_StoreWord8:
Expand Down
6 changes: 6 additions & 0 deletions src/compiler/backend/s390/instruction-selector-s390.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2825,6 +2825,12 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
case LoadTransformation::kS128Load32x2U:
opcode = kS390_S128Load32x2U;
break;
case LoadTransformation::kS128Load32Zero:
opcode = kS390_S128Load32Zero;
break;
case LoadTransformation::kS128Load64Zero:
opcode = kS390_S128Load64Zero;
break;
default:
UNREACHABLE();
}
Expand Down

0 comments on commit 9cc4140

Please sign in to comment.