Skip to content
This repository was archived by the owner on Jan 26, 2022. It is now read-only.

Commit

Permalink
Rename shuffle to swizzle
Browse files Browse the repository at this point in the history
This is in preparation for replacing shuffleMix with a fully-general
two-input shuffle, to be named 'shuffle'.
  • Loading branch information
sunfishcode committed Oct 6, 2014
1 parent c5ad24c commit ceb4f9d
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 102 deletions.
96 changes: 48 additions & 48 deletions src/benchmarks/inverse4x4.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,76 +135,76 @@

// ----
tmp1 = SIMD.float32x4.mul(row2, row3);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor0 = SIMD.float32x4.mul(row1, tmp1);
minor1 = SIMD.float32x4.mul(row0, tmp1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.sub(SIMD.float32x4.mul(row1, tmp1), minor0);
minor1 = SIMD.float32x4.sub(SIMD.float32x4.mul(row0, tmp1), minor1);
minor1 = SIMD.float32x4.shuffle(minor1, SIMD.ZWXY); // 0x4E = 01001110
minor1 = SIMD.float32x4.swizzle(minor1, SIMD.ZWXY); // 0x4E = 01001110

// ----
tmp1 = SIMD.float32x4.mul(row1, row2);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor0 = SIMD.float32x4.add(SIMD.float32x4.mul(row3, tmp1), minor0);
minor3 = SIMD.float32x4.mul(row0, tmp1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.sub(minor0, SIMD.float32x4.mul(row3, tmp1));
minor3 = SIMD.float32x4.sub(SIMD.float32x4.mul(row0, tmp1), minor3);
minor3 = SIMD.float32x4.shuffle(minor3, SIMD.ZWXY); // 0x4E = 01001110
minor3 = SIMD.float32x4.swizzle(minor3, SIMD.ZWXY); // 0x4E = 01001110

// ----
tmp1 = SIMD.float32x4.mul(SIMD.float32x4.shuffle(row1, SIMD.ZWXY), row3); // 0x4E = 01001110
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
row2 = SIMD.float32x4.shuffle(row2, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.mul(SIMD.float32x4.swizzle(row1, SIMD.ZWXY), row3); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
row2 = SIMD.float32x4.swizzle(row2, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.add(SIMD.float32x4.mul(row2, tmp1), minor0);
minor2 = SIMD.float32x4.mul(row0, tmp1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.sub(minor0, SIMD.float32x4.mul(row2, tmp1));
minor2 = SIMD.float32x4.sub(SIMD.float32x4.mul(row0, tmp1), minor2);
minor2 = SIMD.float32x4.shuffle(minor2, SIMD.ZWXY); // 0x4E = 01001110
minor2 = SIMD.float32x4.swizzle(minor2, SIMD.ZWXY); // 0x4E = 01001110

// ----
tmp1 = SIMD.float32x4.mul(row0, row1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor2 = SIMD.float32x4.add(SIMD.float32x4.mul(row3, tmp1), minor2);
minor3 = SIMD.float32x4.sub(SIMD.float32x4.mul(row2, tmp1), minor3);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor2 = SIMD.float32x4.sub(SIMD.float32x4.mul(row3, tmp1), minor2);
minor3 = SIMD.float32x4.sub(minor3, SIMD.float32x4.mul(row2, tmp1));

// ----
tmp1 = SIMD.float32x4.mul(row0, row3);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor1 = SIMD.float32x4.sub(minor1, SIMD.float32x4.mul(row2, tmp1));
minor2 = SIMD.float32x4.add(SIMD.float32x4.mul(row1, tmp1), minor2);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor1 = SIMD.float32x4.add(SIMD.float32x4.mul(row2, tmp1), minor1);
minor2 = SIMD.float32x4.sub(minor2, SIMD.float32x4.mul(row1, tmp1));

// ----
tmp1 = SIMD.float32x4.mul(row0, row2);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor1 = SIMD.float32x4.add(SIMD.float32x4.mul(row3, tmp1), minor1);
minor3 = SIMD.float32x4.sub(minor3, SIMD.float32x4.mul(row1, tmp1));
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor1 = SIMD.float32x4.sub(minor1, SIMD.float32x4.mul(row3, tmp1));
minor3 = SIMD.float32x4.add(SIMD.float32x4.mul(row1, tmp1), minor3);

// Compute determinant
det = SIMD.float32x4.mul(row0, minor0);
det = SIMD.float32x4.add(SIMD.float32x4.shuffle(det, SIMD.ZWXY), det); // 0x4E = 01001110
det = SIMD.float32x4.add(SIMD.float32x4.shuffle(det, SIMD.YXWZ), det); // 0xB1 = 10110001
det = SIMD.float32x4.add(SIMD.float32x4.swizzle(det, SIMD.ZWXY), det); // 0x4E = 01001110
det = SIMD.float32x4.add(SIMD.float32x4.swizzle(det, SIMD.YXWZ), det); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.reciprocal(det);
det = SIMD.float32x4.sub(SIMD.float32x4.add(tmp1, tmp1), SIMD.float32x4.mul(det, SIMD.float32x4.mul(tmp1, tmp1)));
det = SIMD.float32x4.shuffle(det, SIMD.XXXX);
det = SIMD.float32x4.swizzle(det, SIMD.XXXX);

// These shuffles aren't necessary if the faulty transposition is done
// up at the top of this function.
//minor0 = SIMD.float32x4.shuffle(minor0, SIMD.ZYXW);
//minor1 = SIMD.float32x4.shuffle(minor1, SIMD.ZYXW);
//minor2 = SIMD.float32x4.shuffle(minor2, SIMD.ZYXW);
//minor3 = SIMD.float32x4.shuffle(minor3, SIMD.ZYXW);
//minor0 = SIMD.float32x4.swizzle(minor0, SIMD.ZYXW);
//minor1 = SIMD.float32x4.swizzle(minor1, SIMD.ZYXW);
//minor2 = SIMD.float32x4.swizzle(minor2, SIMD.ZYXW);
//minor3 = SIMD.float32x4.swizzle(minor3, SIMD.ZYXW);

// Compute final values by multiplying with 1/det
minor0 = SIMD.float32x4.mul(det, minor0);
Expand Down Expand Up @@ -344,76 +344,76 @@

// ----
tmp1 = SIMD.float32x4.mul(row2, row3);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor0 = SIMD.float32x4.mul(row1, tmp1);
minor1 = SIMD.float32x4.mul(row0, tmp1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.sub(SIMD.float32x4.mul(row1, tmp1), minor0);
minor1 = SIMD.float32x4.sub(SIMD.float32x4.mul(row0, tmp1), minor1);
minor1 = SIMD.float32x4.shuffle(minor1, SIMD.ZWXY); // 0x4E = 01001110
minor1 = SIMD.float32x4.swizzle(minor1, SIMD.ZWXY); // 0x4E = 01001110

// ----
tmp1 = SIMD.float32x4.mul(row1, row2);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor0 = SIMD.float32x4.add(SIMD.float32x4.mul(row3, tmp1), minor0);
minor3 = SIMD.float32x4.mul(row0, tmp1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.sub(minor0, SIMD.float32x4.mul(row3, tmp1));
minor3 = SIMD.float32x4.sub(SIMD.float32x4.mul(row0, tmp1), minor3);
minor3 = SIMD.float32x4.shuffle(minor3, SIMD.ZWXY); // 0x4E = 01001110
minor3 = SIMD.float32x4.swizzle(minor3, SIMD.ZWXY); // 0x4E = 01001110

// ----
tmp1 = SIMD.float32x4.mul(SIMD.float32x4.shuffle(row1, SIMD.ZWXY), row3); // 0x4E = 01001110
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
row2 = SIMD.float32x4.shuffle(row2, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.mul(SIMD.float32x4.swizzle(row1, SIMD.ZWXY), row3); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
row2 = SIMD.float32x4.swizzle(row2, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.add(SIMD.float32x4.mul(row2, tmp1), minor0);
minor2 = SIMD.float32x4.mul(row0, tmp1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor0 = SIMD.float32x4.sub(minor0, SIMD.float32x4.mul(row2, tmp1));
minor2 = SIMD.float32x4.sub(SIMD.float32x4.mul(row0, tmp1), minor2);
minor2 = SIMD.float32x4.shuffle(minor2, SIMD.ZWXY); // 0x4E = 01001110
minor2 = SIMD.float32x4.swizzle(minor2, SIMD.ZWXY); // 0x4E = 01001110

// ----
tmp1 = SIMD.float32x4.mul(row0, row1);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor2 = SIMD.float32x4.add(SIMD.float32x4.mul(row3, tmp1), minor2);
minor3 = SIMD.float32x4.sub(SIMD.float32x4.mul(row2, tmp1), minor3);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor2 = SIMD.float32x4.sub(SIMD.float32x4.mul(row3, tmp1), minor2);
minor3 = SIMD.float32x4.sub(minor3, SIMD.float32x4.mul(row2, tmp1));

// ----
tmp1 = SIMD.float32x4.mul(row0, row3);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor1 = SIMD.float32x4.sub(minor1, SIMD.float32x4.mul(row2, tmp1));
minor2 = SIMD.float32x4.add(SIMD.float32x4.mul(row1, tmp1), minor2);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor1 = SIMD.float32x4.add(SIMD.float32x4.mul(row2, tmp1), minor1);
minor2 = SIMD.float32x4.sub(minor2, SIMD.float32x4.mul(row1, tmp1));

// ----
tmp1 = SIMD.float32x4.mul(row0, row2);
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.YXWZ); // 0xB1 = 10110001
minor1 = SIMD.float32x4.add(SIMD.float32x4.mul(row3, tmp1), minor1);
minor3 = SIMD.float32x4.sub(minor3, SIMD.float32x4.mul(row1, tmp1));
tmp1 = SIMD.float32x4.shuffle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
tmp1 = SIMD.float32x4.swizzle(tmp1, SIMD.ZWXY); // 0x4E = 01001110
minor1 = SIMD.float32x4.sub(minor1, SIMD.float32x4.mul(row3, tmp1));
minor3 = SIMD.float32x4.add(SIMD.float32x4.mul(row1, tmp1), minor3);

// Compute determinant
det = SIMD.float32x4.mul(row0, minor0);
det = SIMD.float32x4.add(SIMD.float32x4.shuffle(det, SIMD.ZWXY), det); // 0x4E = 01001110
det = SIMD.float32x4.add(SIMD.float32x4.shuffle(det, SIMD.YXWZ), det); // 0xB1 = 10110001
det = SIMD.float32x4.add(SIMD.float32x4.swizzle(det, SIMD.ZWXY), det); // 0x4E = 01001110
det = SIMD.float32x4.add(SIMD.float32x4.swizzle(det, SIMD.YXWZ), det); // 0xB1 = 10110001
tmp1 = SIMD.float32x4.reciprocal(det);
det = SIMD.float32x4.sub(SIMD.float32x4.add(tmp1, tmp1), SIMD.float32x4.mul(det, SIMD.float32x4.mul(tmp1, tmp1)));
det = SIMD.float32x4.shuffle(det, SIMD.XXXX);
det = SIMD.float32x4.swizzle(det, SIMD.XXXX);

// These shuffles aren't necessary if the faulty transposition is done
// up at the top of this function.
//minor0 = SIMD.float32x4.shuffle(minor0, SIMD.ZYXW);
//minor1 = SIMD.float32x4.shuffle(minor1, SIMD.ZYXW);
//minor2 = SIMD.float32x4.shuffle(minor2, SIMD.ZYXW);
//minor3 = SIMD.float32x4.shuffle(minor3, SIMD.ZYXW);
//minor0 = SIMD.float32x4.swizzle(minor0, SIMD.ZYXW);
//minor1 = SIMD.float32x4.swizzle(minor1, SIMD.ZYXW);
//minor2 = SIMD.float32x4.swizzle(minor2, SIMD.ZYXW);
//minor3 = SIMD.float32x4.swizzle(minor3, SIMD.ZYXW);

// Compute final values by multiplying with 1/det
minor0 = SIMD.float32x4.mul(det, minor0);
Expand Down
32 changes: 16 additions & 16 deletions src/benchmarks/matrix-multiplication.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,36 +138,36 @@
var a3 = T1x4.getAt(3);
var b0 = T2x4.getAt(0);
Outx4.setAt(0, SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b0, SIMD.XXXX), a0),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b0, SIMD.XXXX), a0),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b0, SIMD.YYYY), a1),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b0, SIMD.YYYY), a1),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b0, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b0, SIMD.WWWW), a3)))))
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b0, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b0, SIMD.WWWW), a3)))))
var b1 = T2x4.getAt(1);
Outx4.setAt(1, SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b1, SIMD.XXXX), a0),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b1, SIMD.XXXX), a0),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b1, SIMD.YYYY), a1),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b1, SIMD.YYYY), a1),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b1, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b1, SIMD.WWWW), a3)))))
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b1, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b1, SIMD.WWWW), a3)))))
var b2 = T2x4.getAt(2);
Outx4.setAt(2, SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b2, SIMD.XXXX), a0),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b2, SIMD.XXXX), a0),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b2, SIMD.YYYY), a1),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b2, SIMD.YYYY), a1),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b2, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b2, SIMD.WWWW), a3)))))
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b2, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b2, SIMD.WWWW), a3)))))
var b3 = T2x4.getAt(3);
Outx4.setAt(3, SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b3, SIMD.XXXX), a0),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b3, SIMD.XXXX), a0),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b3, SIMD.YYYY), a1),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b3, SIMD.YYYY), a1),
SIMD.float32x4.add(
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b3, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.shuffle(b3, SIMD.WWWW), a3)))))
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b3, SIMD.ZZZZ), a2),
SIMD.float32x4.mul(SIMD.float32x4.swizzle(b3, SIMD.WWWW), a3)))))
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/benchmarks/shiftrows.js
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@
for (var r = 1; r < 4; ++r) {
var rx4 = statex4.getAt(r);
if (r == 1) {
statex4.setAt(r, SIMD.int32x4.shuffle(rx4, SIMD.YZWX));
statex4.setAt(r, SIMD.int32x4.swizzle(rx4, SIMD.YZWX));
}
else if (r == 2) {
statex4.setAt(r, SIMD.int32x4.shuffle(rx4, SIMD.ZWXY));
statex4.setAt(r, SIMD.int32x4.swizzle(rx4, SIMD.ZWXY));
}
else { // r == 3
statex4.setAt(r, SIMD.int32x4.shuffle(rx4, SIMD.WXYZ));
statex4.setAt(r, SIMD.int32x4.swizzle(rx4, SIMD.WXYZ));
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/benchmarks/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@

function simdVertexTransform(n) {
for (var i = 0; i < n; i++) {
var xxxx = SIMD.float32x4.shuffle(Vx4.getAt(0), SIMD.XXXX);
var xxxx = SIMD.float32x4.swizzle(Vx4.getAt(0), SIMD.XXXX);
var z = SIMD.float32x4.zero();
z = SIMD.float32x4.add(z, SIMD.float32x4.mul(xxxx, Tx4.getAt(0)));
var yyyy = SIMD.float32x4.shuffle(Vx4.getAt(0), SIMD.YYYY);
var yyyy = SIMD.float32x4.swizzle(Vx4.getAt(0), SIMD.YYYY);
z = SIMD.float32x4.add(z, SIMD.float32x4.mul(yyyy, Tx4.getAt(1)));
var zzzz = SIMD.float32x4.shuffle(Vx4.getAt(0), SIMD.ZZZZ);
var zzzz = SIMD.float32x4.swizzle(Vx4.getAt(0), SIMD.ZZZZ);
z = SIMD.float32x4.add(z, SIMD.float32x4.mul(zzzz, Tx4.getAt(2)));
var wwww = SIMD.float32x4.shuffle(Vx4.getAt(0), SIMD.WWWW);
var wwww = SIMD.float32x4.swizzle(Vx4.getAt(0), SIMD.WWWW);
z = SIMD.float32x4.add(z, SIMD.float32x4.mul(wwww, Tx4.getAt(3)));
Outx4.setAt(0, z);
}
Expand Down
18 changes: 9 additions & 9 deletions src/benchmarks/transpose4x4.js
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,15 @@
var tmp01;
var tmp23;

tmp01 = SIMD.float32x4.select(sel_ttff, src0, SIMD.float32x4.shuffle(src1, SIMD.XXXY));
tmp23 = SIMD.float32x4.select(sel_ttff, src2, SIMD.float32x4.shuffle(src3, SIMD.XXXY));
dst0 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.shuffle(tmp01, SIMD.XZXX), SIMD.float32x4.shuffle(tmp23, SIMD.XXXZ));
dst1 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.shuffle(tmp01, SIMD.YWXX), SIMD.float32x4.shuffle(tmp23, SIMD.XXYW));

tmp01 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.shuffle(src0, SIMD.ZWXX), src1);
tmp23 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.shuffle(src2, SIMD.ZWXX), src3);
dst2 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.shuffle(tmp01, SIMD.XZXX), SIMD.float32x4.shuffle(tmp23, SIMD.XXXZ));
dst3 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.shuffle(tmp01, SIMD.YWXX), SIMD.float32x4.shuffle(tmp23, SIMD.XXYW));
tmp01 = SIMD.float32x4.select(sel_ttff, src0, SIMD.float32x4.swizzle(src1, SIMD.XXXY));
tmp23 = SIMD.float32x4.select(sel_ttff, src2, SIMD.float32x4.swizzle(src3, SIMD.XXXY));
dst0 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.swizzle(tmp01, SIMD.XZXX), SIMD.float32x4.swizzle(tmp23, SIMD.XXXZ));
dst1 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.swizzle(tmp01, SIMD.YWXX), SIMD.float32x4.swizzle(tmp23, SIMD.XXYW));

tmp01 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.swizzle(src0, SIMD.ZWXX), src1);
tmp23 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.swizzle(src2, SIMD.ZWXX), src3);
dst2 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.swizzle(tmp01, SIMD.XZXX), SIMD.float32x4.swizzle(tmp23, SIMD.XXXZ));
dst3 = SIMD.float32x4.select(sel_ttff, SIMD.float32x4.swizzle(tmp01, SIMD.YWXX), SIMD.float32x4.swizzle(tmp23, SIMD.XXYW));

dstx4.setAt(0, dst0);
dstx4.setAt(1, dst1);
Expand Down
Loading

0 comments on commit ceb4f9d

Please sign in to comment.