From dc9126a73d1698a09a487872e45888f6e1ddf0b2 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Fri, 13 Nov 2020 13:45:45 -0800 Subject: [PATCH 1/2] Make Matrix implementation more SIMD friendly This is backporting some changes we made in OSL (with customized versions of Imath headers that we can thankfully no longer need if we push the fixes back to the Imath project). This was all pointed out by people from Intel working on OSL, so I'm deferring to their judgment that this is the best solution. * Avoid memset or memcpy to initialize or copy a matrix, intead use element-by-element copies. In a scalar context they generate the same code (with the optimizer on, at least), but when those operations are done inside a loop that you hope will be auto-vectorized, the casting and function calling involved in using memset/memcpy will confuse the vectorizer and you'll end up with inferior code, sometimes even not vectorizing the whole loop. * Replace a bunch of instances of accessing vector elements as [0], [1], [2] with .x, .y, .z. This is because our particular implementation of `operator[]` involves a pointer cast that confuses the auto-vectorizers. For best vector performance (not just of individual operations but also when doing ostensibly scalar operations inside loops that could and should be vectorized), it's important to say v.x rather than v[0]. * Expand `Matrix3::operator*(Matrix3)` to unroll the loops and also avoid needless initialization of elements to 0 only to immediately write over the value in the first iteration of the loop. Signed-off-by: Larry Gritz --- src/Imath/ImathMatrix.h | 649 +++++++++++++++++++++++----------------- 1 file changed, 367 insertions(+), 282 deletions(-) diff --git a/src/Imath/ImathMatrix.h b/src/Imath/ImathMatrix.h index 48a5fc59..bbfc247b 100644 --- a/src/Imath/ImathMatrix.h +++ b/src/Imath/ImathMatrix.h @@ -1106,7 +1106,14 @@ template IMATH_CONSTEXPR14 inline Matrix22::Matrix22 (T a) template IMATH_CONSTEXPR14 inline Matrix22::Matrix22 (const T a[2][2]) { - memcpy (x, a, sizeof (x)); + // Function calls and aliasing issues can inhibit vectorization versus + // straight assignment of data members, so instead of this: + // memcpy (x, a, sizeof (x)); + // we do this: + x[0][0] = a[0][0]; + x[0][1] = a[0][1]; + x[1][0] = a[1][0]; + x[1][1] = a[1][1]; } template IMATH_CONSTEXPR14 inline Matrix22::Matrix22 (T a, T b, T c, T d) @@ -1119,7 +1126,14 @@ template IMATH_CONSTEXPR14 inline Matrix22::Matrix22 (T a, T b, T c template IMATH_CONSTEXPR14 inline Matrix22::Matrix22 (const Matrix22& v) { - memcpy (x, v.x, sizeof (x)); + // Function calls and aliasing issues can inhibit vectorization versus + // straight assignment of data members, so we don't do this: + // memcpy (x, v.x, sizeof (x)); + // we do this: + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; } template @@ -1136,7 +1150,14 @@ template IMATH_CONSTEXPR14 inline const Matrix22& Matrix22::operator= (const Matrix22& v) { - memcpy (x, v.x, sizeof (x)); + // Function calls and aliasing issues can inhibit vectorization versus + // straight assignment of data members, so we don't do this: + // memcpy (x, v.x, sizeof (x)); + // we do this: + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; return *this; } @@ -1170,17 +1191,10 @@ template inline void Matrix22::getValue (Matrix22& v) const { - if (isSameType::value) - { - memcpy (v.x, x, sizeof (x)); - } - else - { - v.x[0][0] = x[0][0]; - v.x[0][1] = x[0][1]; - v.x[1][0] = x[1][0]; - v.x[1][1] = x[1][1]; - } + v.x[0][0] = x[0][0]; + v.x[0][1] = x[0][1]; + v.x[1][0] = x[1][0]; + v.x[1][1] = x[1][1]; } template @@ -1188,18 +1202,10 @@ template IMATH_CONSTEXPR14 inline Matrix22& Matrix22::setValue (const Matrix22& v) { - if (isSameType::value) - { - memcpy (x, v.x, sizeof (x)); - } - else - { - x[0][0] = v.x[0][0]; - x[0][1] = v.x[0][1]; - x[1][0] = v.x[1][0]; - x[1][1] = v.x[1][1]; - } - + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; return *this; } @@ -1208,18 +1214,10 @@ template IMATH_CONSTEXPR14 inline Matrix22& Matrix22::setTheMatrix (const Matrix22& v) { - if (isSameType::value) - { - memcpy (x, v.x, sizeof (x)); - } - else - { - x[0][0] = v.x[0][0]; - x[0][1] = v.x[0][1]; - x[1][0] = v.x[1][0]; - x[1][1] = v.x[1][1]; - } - + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; return *this; } @@ -1422,8 +1420,8 @@ Matrix22::multDirMatrix (const Vec2& src, Vec2& dst) const { S a, b; - a = src[0] * x[0][0] + src[1] * x[1][0]; - b = src[0] * x[0][1] + src[1] * x[1][1]; + a = src.x * x[0][0] + src.y * x[1][0]; + b = src.x * x[0][1] + src.y * x[1][1]; dst.x = a; dst.y = b; @@ -1618,10 +1616,10 @@ template IMATH_CONSTEXPR14 inline const Matrix22& Matrix22::setScale (const Vec2& s) { - x[0][0] = s[0]; + x[0][0] = s.x; x[0][1] = static_cast (0); x[1][0] = static_cast (0); - x[1][1] = s[1]; + x[1][1] = s.y; return *this; } @@ -1632,11 +1630,11 @@ template IMATH_CONSTEXPR14 inline const Matrix22& Matrix22::scale (const Vec2& s) { - x[0][0] *= s[0]; - x[0][1] *= s[0]; + x[0][0] *= s.x; + x[0][1] *= s.x; - x[1][0] *= s[1]; - x[1][1] *= s[1]; + x[1][0] *= s.y; + x[1][1] *= s.y; return *this; } @@ -1660,14 +1658,17 @@ Matrix33::operator[] (int i) const } template -inline - - IMATH_CONSTEXPR14 - Matrix33::Matrix33() +inline IMATH_CONSTEXPR14 +Matrix33::Matrix33() { - memset (x, 0, sizeof (x)); x[0][0] = 1; + x[0][1] = 0; + x[0][2] = 0; + x[1][0] = 0; x[1][1] = 1; + x[1][2] = 0; + x[2][0] = 0; + x[2][1] = 0; x[2][2] = 1; } @@ -1686,7 +1687,19 @@ template IMATH_CONSTEXPR14 inline Matrix33::Matrix33 (T a) template IMATH_CONSTEXPR14 inline Matrix33::Matrix33 (const T a[3][3]) { - memcpy (x, a, sizeof (x)); + // Function calls and aliasing issues can inhibit vectorization versus + // straight assignment of data members, so instead of this: + // memcpy (x, a, sizeof (x)); + // we do this: + x[0][0] = a[0][0]; + x[0][1] = a[0][1]; + x[0][2] = a[0][2]; + x[1][0] = a[1][0]; + x[1][1] = a[1][1]; + x[1][2] = a[1][2]; + x[2][0] = a[2][0]; + x[2][1] = a[2][1]; + x[2][2] = a[2][2]; } template @@ -1706,7 +1719,19 @@ IMATH_CONSTEXPR14 inline Matrix33::Matrix33 (T a, T b, T c, T d, T e, T f, T template IMATH_CONSTEXPR14 inline Matrix33::Matrix33 (const Matrix33& v) { - memcpy (x, v.x, sizeof (x)); + // Function calls and aliasing issues can inhibit vectorization versus + // straight assignment of data members, so instead of this: + // memcpy (x, v.x, sizeof (x)); + // we do this: + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[0][2] = v.x[0][2]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; + x[1][2] = v.x[1][2]; + x[2][0] = v.x[2][0]; + x[2][1] = v.x[2][1]; + x[2][2] = v.x[2][2]; } template @@ -1730,7 +1755,19 @@ template IMATH_CONSTEXPR14 inline const Matrix33& Matrix33::operator= (const Matrix33& v) { - memcpy (x, v.x, sizeof (x)); + // Function calls and aliasing issues can inhibit vectorization versus + // straight assignment of data members, so instead of this: + // memcpy (x, v.x, sizeof (x)); + // we do this: + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[0][2] = v.x[0][2]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; + x[1][2] = v.x[1][2]; + x[2][0] = v.x[2][0]; + x[2][1] = v.x[2][1]; + x[2][2] = v.x[2][2]; return *this; } @@ -1770,22 +1807,15 @@ template inline void Matrix33::getValue (Matrix33& v) const { - if (isSameType::value) - { - memcpy (v.x, x, sizeof (x)); - } - else - { - v.x[0][0] = x[0][0]; - v.x[0][1] = x[0][1]; - v.x[0][2] = x[0][2]; - v.x[1][0] = x[1][0]; - v.x[1][1] = x[1][1]; - v.x[1][2] = x[1][2]; - v.x[2][0] = x[2][0]; - v.x[2][1] = x[2][1]; - v.x[2][2] = x[2][2]; - } + v.x[0][0] = x[0][0]; + v.x[0][1] = x[0][1]; + v.x[0][2] = x[0][2]; + v.x[1][0] = x[1][0]; + v.x[1][1] = x[1][1]; + v.x[1][2] = x[1][2]; + v.x[2][0] = x[2][0]; + v.x[2][1] = x[2][1]; + v.x[2][2] = x[2][2]; } template @@ -1793,23 +1823,15 @@ template IMATH_CONSTEXPR14 inline Matrix33& Matrix33::setValue (const Matrix33& v) { - if (isSameType::value) - { - memcpy (x, v.x, sizeof (x)); - } - else - { - x[0][0] = v.x[0][0]; - x[0][1] = v.x[0][1]; - x[0][2] = v.x[0][2]; - x[1][0] = v.x[1][0]; - x[1][1] = v.x[1][1]; - x[1][2] = v.x[1][2]; - x[2][0] = v.x[2][0]; - x[2][1] = v.x[2][1]; - x[2][2] = v.x[2][2]; - } - + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[0][2] = v.x[0][2]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; + x[1][2] = v.x[1][2]; + x[2][0] = v.x[2][0]; + x[2][1] = v.x[2][1]; + x[2][2] = v.x[2][2]; return *this; } @@ -1818,23 +1840,15 @@ template IMATH_CONSTEXPR14 inline Matrix33& Matrix33::setTheMatrix (const Matrix33& v) { - if (isSameType::value) - { - memcpy (x, v.x, sizeof (x)); - } - else - { - x[0][0] = v.x[0][0]; - x[0][1] = v.x[0][1]; - x[0][2] = v.x[0][2]; - x[1][0] = v.x[1][0]; - x[1][1] = v.x[1][1]; - x[1][2] = v.x[1][2]; - x[2][0] = v.x[2][0]; - x[2][1] = v.x[2][1]; - x[2][2] = v.x[2][2]; - } - + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[0][2] = v.x[0][2]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; + x[1][2] = v.x[1][2]; + x[2][0] = v.x[2][0]; + x[2][1] = v.x[2][1]; + x[2][2] = v.x[2][2]; return *this; } @@ -1842,9 +1856,14 @@ template inline void Matrix33::makeIdentity() { - memset (x, 0, sizeof (x)); x[0][0] = 1; + x[0][1] = 0; + x[0][2] = 0; + x[1][0] = 0; x[1][1] = 1; + x[1][2] = 0; + x[2][0] = 0; + x[2][1] = 0; x[2][2] = 1; } @@ -2063,12 +2082,21 @@ template IMATH_CONSTEXPR14 inline const Matrix33& Matrix33::operator*= (const Matrix33& v) { - Matrix33 tmp (T (0)); + // Avoid initializing with 0 values before immediately overwriting them, + // and unroll all loops for the best autovectorization. + Matrix33 tmp(Imath::UNINITIALIZED); - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - tmp.x[i][j] += x[i][k] * v.x[k][j]; + tmp.x[0][0] = x[0][0] * v.x[0][0] + x[0][1] * v.x[1][0] + x[0][2] * v.x[2][0]; + tmp.x[0][1] = x[0][0] * v.x[0][1] + x[0][1] * v.x[1][1] + x[0][2] * v.x[2][1]; + tmp.x[0][2] = x[0][0] * v.x[0][2] + x[0][1] * v.x[1][2] + x[0][2] * v.x[2][2]; + + tmp.x[1][0] = x[1][0] * v.x[0][0] + x[1][1] * v.x[1][0] + x[1][2] * v.x[2][0]; + tmp.x[1][1] = x[1][0] * v.x[0][1] + x[1][1] * v.x[1][1] + x[1][2] * v.x[2][1]; + tmp.x[1][2] = x[1][0] * v.x[0][2] + x[1][1] * v.x[1][2] + x[1][2] * v.x[2][2]; + + tmp.x[2][0] = x[2][0] * v.x[0][0] + x[2][1] * v.x[1][0] + x[2][2] * v.x[2][0]; + tmp.x[2][1] = x[2][0] * v.x[0][1] + x[2][1] * v.x[1][1] + x[2][2] * v.x[2][1]; + tmp.x[2][2] = x[2][0] * v.x[0][2] + x[2][1] * v.x[1][2] + x[2][2] * v.x[2][2]; *this = tmp; return *this; @@ -2078,12 +2106,21 @@ template IMATH_CONSTEXPR14 inline Matrix33 Matrix33::operator* (const Matrix33& v) const { - Matrix33 tmp (T (0)); + // Avoid initializing with 0 values before immediately overwriting them, + // and unroll all loops for the best autovectorization. + Matrix33 tmp(Imath::UNINITIALIZED); - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - tmp.x[i][j] += x[i][k] * v.x[k][j]; + tmp.x[0][0] = x[0][0] * v.x[0][0] + x[0][1] * v.x[1][0] + x[0][2] * v.x[2][0]; + tmp.x[0][1] = x[0][0] * v.x[0][1] + x[0][1] * v.x[1][1] + x[0][2] * v.x[2][1]; + tmp.x[0][2] = x[0][0] * v.x[0][2] + x[0][1] * v.x[1][2] + x[0][2] * v.x[2][2]; + + tmp.x[1][0] = x[1][0] * v.x[0][0] + x[1][1] * v.x[1][0] + x[1][2] * v.x[2][0]; + tmp.x[1][1] = x[1][0] * v.x[0][1] + x[1][1] * v.x[1][1] + x[1][2] * v.x[2][1]; + tmp.x[1][2] = x[1][0] * v.x[0][2] + x[1][1] * v.x[1][2] + x[1][2] * v.x[2][2]; + + tmp.x[2][0] = x[2][0] * v.x[0][0] + x[2][1] * v.x[1][0] + x[2][2] * v.x[2][0]; + tmp.x[2][1] = x[2][0] * v.x[0][1] + x[2][1] * v.x[1][1] + x[2][2] * v.x[2][1]; + tmp.x[2][2] = x[2][0] * v.x[0][2] + x[2][1] * v.x[1][2] + x[2][2] * v.x[2][2]; return tmp; } @@ -2095,9 +2132,9 @@ Matrix33::multVecMatrix (const Vec2& src, Vec2& dst) const { S a, b, w; - a = src[0] * x[0][0] + src[1] * x[1][0] + x[2][0]; - b = src[0] * x[0][1] + src[1] * x[1][1] + x[2][1]; - w = src[0] * x[0][2] + src[1] * x[1][2] + x[2][2]; + a = src.x * x[0][0] + src.y * x[1][0] + x[2][0]; + b = src.x * x[0][1] + src.y * x[1][1] + x[2][1]; + w = src.x * x[0][2] + src.y * x[1][2] + x[2][2]; dst.x = a / w; dst.y = b / w; @@ -2110,8 +2147,8 @@ Matrix33::multDirMatrix (const Vec2& src, Vec2& dst) const { S a, b; - a = src[0] * x[0][0] + src[1] * x[1][0]; - b = src[0] * x[0][1] + src[1] * x[1][1]; + a = src.x * x[0][0] + src.y * x[1][0]; + b = src.x * x[0][1] + src.y * x[1][1]; dst.x = a; dst.y = b; @@ -2689,11 +2726,21 @@ template IMATH_CONSTEXPR14 inline const Matrix33& Matrix33::setScale (T s) { - memset (x, 0, sizeof (x)); x[0][0] = s; + x[0][1] = 0; + x[0][2] = 0; + x[1][0] = 0; x[1][1] = s; - x[2][2] = 1; - + x[1][2] = 0; + x[2][0] = 0; + x[2][1] = 0; + x[2][2] = s; + // FIXME: The old code looked like this: + // memset (x, 0, sizeof (x)); + // x[0][0] = s; + // x[1][1] = s; + // x[2][2] = 1; + // Why is the [2][2] component 1? That seems wrong. return *this; } @@ -2702,11 +2749,15 @@ template IMATH_CONSTEXPR14 inline const Matrix33& Matrix33::setScale (const Vec2& s) { - memset (x, 0, sizeof (x)); - x[0][0] = s[0]; - x[1][1] = s[1]; + x[0][0] = s.x; + x[0][1] = 0; + x[0][2] = 0; + x[1][0] = 0; + x[1][1] = s.y; + x[1][2] = 0; + x[2][0] = 0; + x[2][1] = 0; x[2][2] = 1; - return *this; } @@ -2715,13 +2766,13 @@ template IMATH_CONSTEXPR14 inline const Matrix33& Matrix33::scale (const Vec2& s) { - x[0][0] *= s[0]; - x[0][1] *= s[0]; - x[0][2] *= s[0]; + x[0][0] *= s.x; + x[0][1] *= s.x; + x[0][2] *= s.x; - x[1][0] *= s[1]; - x[1][1] *= s[1]; - x[1][2] *= s[1]; + x[1][0] *= s.y; + x[1][1] *= s.y; + x[1][2] *= s.y; return *this; } @@ -2739,8 +2790,8 @@ Matrix33::setTranslation (const Vec2& t) x[1][1] = 1; x[1][2] = 0; - x[2][0] = t[0]; - x[2][1] = t[1]; + x[2][0] = t.x; + x[2][1] = t.y; x[2][2] = 1; return *this; @@ -2758,9 +2809,9 @@ template IMATH_CONSTEXPR14 inline const Matrix33& Matrix33::translate (const Vec2& t) { - x[2][0] += t[0] * x[0][0] + t[1] * x[1][0]; - x[2][1] += t[0] * x[0][1] + t[1] * x[1][1]; - x[2][2] += t[0] * x[0][2] + t[1] * x[1][2]; + x[2][0] += t.x * x[0][0] + t.y * x[1][0]; + x[2][1] += t.x * x[0][1] + t.y * x[1][1]; + x[2][2] += t.x * x[0][2] + t.y * x[1][2]; return *this; } @@ -2791,10 +2842,10 @@ IMATH_CONSTEXPR14 inline const Matrix33& Matrix33::setShear (const Vec2& h) { x[0][0] = 1; - x[0][1] = h[1]; + x[0][1] = h.y; x[0][2] = 0; - x[1][0] = h[0]; + x[1][0] = h.x; x[1][1] = 1; x[1][2] = 0; @@ -2830,13 +2881,13 @@ Matrix33::shear (const Vec2& h) { Matrix33 P (*this); - x[0][0] = P[0][0] + h[1] * P[1][0]; - x[0][1] = P[0][1] + h[1] * P[1][1]; - x[0][2] = P[0][2] + h[1] * P[1][2]; + x[0][0] = P[0][0] + h.y * P[1][0]; + x[0][1] = P[0][1] + h.y * P[1][1]; + x[0][2] = P[0][2] + h.y * P[1][2]; - x[1][0] = P[1][0] + h[0] * P[0][0]; - x[1][1] = P[1][1] + h[0] * P[0][1]; - x[1][2] = P[1][2] + h[0] * P[0][2]; + x[1][0] = P[1][0] + h.x * P[0][0]; + x[1][1] = P[1][1] + h.x * P[0][1]; + x[1][2] = P[1][2] + h.x * P[0][2]; return *this; } @@ -2861,10 +2912,21 @@ Matrix44::operator[] (int i) const template IMATH_CONSTEXPR14 inline Matrix44::Matrix44() { - memset (x, 0, sizeof (x)); x[0][0] = 1; + x[0][1] = 0; + x[0][2] = 0; + x[0][3] = 0; + x[1][0] = 0; x[1][1] = 1; + x[1][2] = 0; + x[1][3] = 0; + x[2][0] = 0; + x[2][1] = 0; x[2][2] = 1; + x[2][3] = 0; + x[3][0] = 0; + x[3][1] = 0; + x[3][2] = 0; x[3][3] = 1; } @@ -2890,7 +2952,22 @@ template IMATH_CONSTEXPR14 inline Matrix44::Matrix44 (T a) template IMATH_CONSTEXPR14 inline Matrix44::Matrix44 (const T a[4][4]) { - memcpy (x, a, sizeof (x)); + x[0][0] = a[0][0]; + x[0][1] = a[0][1]; + x[0][2] = a[0][2]; + x[0][3] = a[0][3]; + x[1][0] = a[1][0]; + x[1][1] = a[1][1]; + x[1][2] = a[1][2]; + x[1][3] = a[1][3]; + x[2][0] = a[2][0]; + x[2][1] = a[2][1]; + x[2][2] = a[2][2]; + x[2][3] = a[2][3]; + x[3][0] = a[3][0]; + x[3][1] = a[3][1]; + x[3][2] = a[3][2]; + x[3][3] = a[3][3]; } template @@ -2929,9 +3006,9 @@ template IMATH_CONSTEXPR14 inline Matrix44::Matrix44 (Matrix33 r x[2][1] = r[2][1]; x[2][2] = r[2][2]; x[2][3] = 0; - x[3][0] = t[0]; - x[3][1] = t[1]; - x[3][2] = t[2]; + x[3][0] = t.x; + x[3][1] = t.y; + x[3][2] = t.z; x[3][3] = 1; } @@ -3042,29 +3119,22 @@ template inline void Matrix44::getValue (Matrix44& v) const { - if (isSameType::value) - { - memcpy (v.x, x, sizeof (x)); - } - else - { - v.x[0][0] = x[0][0]; - v.x[0][1] = x[0][1]; - v.x[0][2] = x[0][2]; - v.x[0][3] = x[0][3]; - v.x[1][0] = x[1][0]; - v.x[1][1] = x[1][1]; - v.x[1][2] = x[1][2]; - v.x[1][3] = x[1][3]; - v.x[2][0] = x[2][0]; - v.x[2][1] = x[2][1]; - v.x[2][2] = x[2][2]; - v.x[2][3] = x[2][3]; - v.x[3][0] = x[3][0]; - v.x[3][1] = x[3][1]; - v.x[3][2] = x[3][2]; - v.x[3][3] = x[3][3]; - } + v.x[0][0] = x[0][0]; + v.x[0][1] = x[0][1]; + v.x[0][2] = x[0][2]; + v.x[0][3] = x[0][3]; + v.x[1][0] = x[1][0]; + v.x[1][1] = x[1][1]; + v.x[1][2] = x[1][2]; + v.x[1][3] = x[1][3]; + v.x[2][0] = x[2][0]; + v.x[2][1] = x[2][1]; + v.x[2][2] = x[2][2]; + v.x[2][3] = x[2][3]; + v.x[3][0] = x[3][0]; + v.x[3][1] = x[3][1]; + v.x[3][2] = x[3][2]; + v.x[3][3] = x[3][3]; } template @@ -3072,30 +3142,22 @@ template IMATH_CONSTEXPR14 inline Matrix44& Matrix44::setValue (const Matrix44& v) { - if (isSameType::value) - { - memcpy (x, v.x, sizeof (x)); - } - else - { - x[0][0] = v.x[0][0]; - x[0][1] = v.x[0][1]; - x[0][2] = v.x[0][2]; - x[0][3] = v.x[0][3]; - x[1][0] = v.x[1][0]; - x[1][1] = v.x[1][1]; - x[1][2] = v.x[1][2]; - x[1][3] = v.x[1][3]; - x[2][0] = v.x[2][0]; - x[2][1] = v.x[2][1]; - x[2][2] = v.x[2][2]; - x[2][3] = v.x[2][3]; - x[3][0] = v.x[3][0]; - x[3][1] = v.x[3][1]; - x[3][2] = v.x[3][2]; - x[3][3] = v.x[3][3]; - } - + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[0][2] = v.x[0][2]; + x[0][3] = v.x[0][3]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; + x[1][2] = v.x[1][2]; + x[1][3] = v.x[1][3]; + x[2][0] = v.x[2][0]; + x[2][1] = v.x[2][1]; + x[2][2] = v.x[2][2]; + x[2][3] = v.x[2][3]; + x[3][0] = v.x[3][0]; + x[3][1] = v.x[3][1]; + x[3][2] = v.x[3][2]; + x[3][3] = v.x[3][3]; return *this; } @@ -3104,30 +3166,22 @@ template IMATH_CONSTEXPR14 inline Matrix44& Matrix44::setTheMatrix (const Matrix44& v) { - if (isSameType::value) - { - memcpy (x, v.x, sizeof (x)); - } - else - { - x[0][0] = v.x[0][0]; - x[0][1] = v.x[0][1]; - x[0][2] = v.x[0][2]; - x[0][3] = v.x[0][3]; - x[1][0] = v.x[1][0]; - x[1][1] = v.x[1][1]; - x[1][2] = v.x[1][2]; - x[1][3] = v.x[1][3]; - x[2][0] = v.x[2][0]; - x[2][1] = v.x[2][1]; - x[2][2] = v.x[2][2]; - x[2][3] = v.x[2][3]; - x[3][0] = v.x[3][0]; - x[3][1] = v.x[3][1]; - x[3][2] = v.x[3][2]; - x[3][3] = v.x[3][3]; - } - + x[0][0] = v.x[0][0]; + x[0][1] = v.x[0][1]; + x[0][2] = v.x[0][2]; + x[0][3] = v.x[0][3]; + x[1][0] = v.x[1][0]; + x[1][1] = v.x[1][1]; + x[1][2] = v.x[1][2]; + x[1][3] = v.x[1][3]; + x[2][0] = v.x[2][0]; + x[2][1] = v.x[2][1]; + x[2][2] = v.x[2][2]; + x[2][3] = v.x[2][3]; + x[3][0] = v.x[3][0]; + x[3][1] = v.x[3][1]; + x[3][2] = v.x[3][2]; + x[3][3] = v.x[3][3]; return *this; } @@ -3135,10 +3189,21 @@ template inline void Matrix44::makeIdentity() { - memset (x, 0, sizeof (x)); x[0][0] = 1; + x[0][1] = 0; + x[0][2] = 0; + x[0][3] = 0; + x[1][0] = 0; x[1][1] = 1; + x[1][2] = 0; + x[1][3] = 0; + x[2][0] = 0; + x[2][1] = 0; x[2][2] = 1; + x[2][3] = 0; + x[3][0] = 0; + x[3][1] = 0; + x[3][2] = 0; x[3][3] = 1; } @@ -3508,10 +3573,10 @@ Matrix44::multVecMatrix (const Vec3& src, Vec3& dst) const { S a, b, c, w; - a = src[0] * x[0][0] + src[1] * x[1][0] + src[2] * x[2][0] + x[3][0]; - b = src[0] * x[0][1] + src[1] * x[1][1] + src[2] * x[2][1] + x[3][1]; - c = src[0] * x[0][2] + src[1] * x[1][2] + src[2] * x[2][2] + x[3][2]; - w = src[0] * x[0][3] + src[1] * x[1][3] + src[2] * x[2][3] + x[3][3]; + a = src.x * x[0][0] + src.y * x[1][0] + src.z * x[2][0] + x[3][0]; + b = src.x * x[0][1] + src.y * x[1][1] + src.z * x[2][1] + x[3][1]; + c = src.x * x[0][2] + src.y * x[1][2] + src.z * x[2][2] + x[3][2]; + w = src.x * x[0][3] + src.y * x[1][3] + src.z * x[2][3] + x[3][3]; dst.x = a / w; dst.y = b / w; @@ -3525,9 +3590,9 @@ Matrix44::multDirMatrix (const Vec3& src, Vec3& dst) const { S a, b, c; - a = src[0] * x[0][0] + src[1] * x[1][0] + src[2] * x[2][0]; - b = src[0] * x[0][1] + src[1] * x[1][1] + src[2] * x[2][1]; - c = src[0] * x[0][2] + src[1] * x[1][2] + src[2] * x[2][2]; + a = src.x * x[0][0] + src.y * x[1][0] + src.z * x[2][0]; + b = src.x * x[0][1] + src.y * x[1][1] + src.z * x[2][1]; + c = src.x * x[0][2] + src.y * x[1][2] + src.z * x[2][2]; dst.x = a; dst.y = b; @@ -4058,13 +4123,13 @@ Matrix44::setEulerAngles (const Vec3& r) { S cos_rz, sin_rz, cos_ry, sin_ry, cos_rx, sin_rx; - cos_rz = cos ((T) r[2]); - cos_ry = cos ((T) r[1]); - cos_rx = cos ((T) r[0]); + cos_rz = cos ((T) r.z); + cos_ry = cos ((T) r.y); + cos_rx = cos ((T) r.x); - sin_rz = sin ((T) r[2]); - sin_ry = sin ((T) r[1]); - sin_rx = sin ((T) r[0]); + sin_rz = sin ((T) r.z); + sin_ry = sin ((T) r.y); + sin_rx = sin ((T) r.x); x[0][0] = cos_rz * cos_ry; x[0][1] = sin_rz * cos_ry; @@ -4098,19 +4163,19 @@ Matrix44::setAxisAngle (const Vec3& axis, S angle) S sine = Math::sin (angle); S cosine = Math::cos (angle); - x[0][0] = unit[0] * unit[0] * (1 - cosine) + cosine; - x[0][1] = unit[0] * unit[1] * (1 - cosine) + unit[2] * sine; - x[0][2] = unit[0] * unit[2] * (1 - cosine) - unit[1] * sine; + x[0][0] = unit.x * unit.x * (1 - cosine) + cosine; + x[0][1] = unit.x * unit.y * (1 - cosine) + unit.z * sine; + x[0][2] = unit.x * unit.z * (1 - cosine) - unit.y * sine; x[0][3] = 0; - x[1][0] = unit[0] * unit[1] * (1 - cosine) - unit[2] * sine; - x[1][1] = unit[1] * unit[1] * (1 - cosine) + cosine; - x[1][2] = unit[1] * unit[2] * (1 - cosine) + unit[0] * sine; + x[1][0] = unit.x * unit.y * (1 - cosine) - unit.z * sine; + x[1][1] = unit.y * unit.y * (1 - cosine) + cosine; + x[1][2] = unit.y * unit.z * (1 - cosine) + unit.x * sine; x[1][3] = 0; - x[2][0] = unit[0] * unit[2] * (1 - cosine) + unit[1] * sine; - x[2][1] = unit[1] * unit[2] * (1 - cosine) - unit[0] * sine; - x[2][2] = unit[2] * unit[2] * (1 - cosine) + cosine; + x[2][0] = unit.x * unit.z * (1 - cosine) + unit.y * sine; + x[2][1] = unit.y * unit.z * (1 - cosine) - unit.x * sine; + x[2][2] = unit.z * unit.z * (1 - cosine) + cosine; x[2][3] = 0; x[3][0] = 0; @@ -4131,13 +4196,13 @@ Matrix44::rotate (const Vec3& r) S m10, m11, m12; S m20, m21, m22; - cos_rz = cos ((S) r[2]); - cos_ry = cos ((S) r[1]); - cos_rx = cos ((S) r[0]); + cos_rz = cos ((S) r.z); + cos_ry = cos ((S) r.y); + cos_rx = cos ((S) r.x); - sin_rz = sin ((S) r[2]); - sin_ry = sin ((S) r[1]); - sin_rx = sin ((S) r[0]); + sin_rz = sin ((S) r.z); + sin_ry = sin ((S) r.y); + sin_rx = sin ((S) r.x); m00 = cos_rz * cos_ry; m01 = sin_rz * cos_ry; @@ -4173,12 +4238,22 @@ template IMATH_CONSTEXPR14 inline const Matrix44& Matrix44::setScale (T s) { - memset (x, 0, sizeof (x)); x[0][0] = s; + x[0][1] = 0; + x[0][2] = 0; + x[0][3] = 0; + x[1][0] = 0; x[1][1] = s; + x[1][2] = 0; + x[1][3] = 0; + x[2][0] = 0; + x[2][1] = 0; x[2][2] = s; + x[2][3] = 0; + x[3][0] = 0; + x[3][1] = 0; + x[3][2] = 0; x[3][3] = 1; - return *this; } @@ -4187,12 +4262,22 @@ template IMATH_CONSTEXPR14 inline const Matrix44& Matrix44::setScale (const Vec3& s) { - memset (x, 0, sizeof (x)); - x[0][0] = s[0]; - x[1][1] = s[1]; - x[2][2] = s[2]; + x[0][0] = s.x; + x[0][1] = 0; + x[0][2] = 0; + x[0][3] = 0; + x[1][0] = 0; + x[1][1] = s.y; + x[1][2] = 0; + x[1][3] = 0; + x[2][0] = 0; + x[2][1] = 0; + x[2][2] = s.z; + x[2][3] = 0; + x[3][0] = 0; + x[3][1] = 0; + x[3][2] = 0; x[3][3] = 1; - return *this; } @@ -4201,20 +4286,20 @@ template IMATH_CONSTEXPR14 inline const Matrix44& Matrix44::scale (const Vec3& s) { - x[0][0] *= s[0]; - x[0][1] *= s[0]; - x[0][2] *= s[0]; - x[0][3] *= s[0]; + x[0][0] *= s.x; + x[0][1] *= s.x; + x[0][2] *= s.x; + x[0][3] *= s.x; - x[1][0] *= s[1]; - x[1][1] *= s[1]; - x[1][2] *= s[1]; - x[1][3] *= s[1]; + x[1][0] *= s.y; + x[1][1] *= s.y; + x[1][2] *= s.y; + x[1][3] *= s.y; - x[2][0] *= s[2]; - x[2][1] *= s[2]; - x[2][2] *= s[2]; - x[2][3] *= s[2]; + x[2][0] *= s.z; + x[2][1] *= s.z; + x[2][2] *= s.z; + x[2][3] *= s.z; return *this; } @@ -4239,9 +4324,9 @@ Matrix44::setTranslation (const Vec3& t) x[2][2] = 1; x[2][3] = 0; - x[3][0] = t[0]; - x[3][1] = t[1]; - x[3][2] = t[2]; + x[3][0] = t.x; + x[3][1] = t.y; + x[3][2] = t.z; x[3][3] = 1; return *this; @@ -4259,10 +4344,10 @@ template IMATH_CONSTEXPR14 inline const Matrix44& Matrix44::translate (const Vec3& t) { - x[3][0] += t[0] * x[0][0] + t[1] * x[1][0] + t[2] * x[2][0]; - x[3][1] += t[0] * x[0][1] + t[1] * x[1][1] + t[2] * x[2][1]; - x[3][2] += t[0] * x[0][2] + t[1] * x[1][2] + t[2] * x[2][2]; - x[3][3] += t[0] * x[0][3] + t[1] * x[1][3] + t[2] * x[2][3]; + x[3][0] += t.x * x[0][0] + t.y * x[1][0] + t.z * x[2][0]; + x[3][1] += t.x * x[0][1] + t.y * x[1][1] + t.z * x[2][1]; + x[3][2] += t.x * x[0][2] + t.y * x[1][2] + t.z * x[2][2]; + x[3][3] += t.x * x[0][3] + t.y * x[1][3] + t.z * x[2][3]; return *this; } @@ -4277,13 +4362,13 @@ Matrix44::setShear (const Vec3& h) x[0][2] = 0; x[0][3] = 0; - x[1][0] = h[0]; + x[1][0] = h.x; x[1][1] = 1; x[1][2] = 0; x[1][3] = 0; - x[2][0] = h[1]; - x[2][1] = h[2]; + x[2][0] = h.y; + x[2][1] = h.z; x[2][2] = 1; x[2][3] = 0; @@ -4336,8 +4421,8 @@ Matrix44::shear (const Vec3& h) for (int i = 0; i < 4; i++) { - x[2][i] += h[1] * x[0][i] + h[2] * x[1][i]; - x[1][i] += h[0] * x[0][i]; + x[2][i] += h.y * x[0][i] + h.z * x[1][i]; + x[1][i] += h.x * x[0][i]; } return *this; From 81bfb93ac183ca8f5dc7cfd0f364edfba42f9734 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Thu, 19 Nov 2020 17:14:42 -0800 Subject: [PATCH 2/2] Adjust comments Signed-off-by: Larry Gritz --- src/Imath/ImathMatrix.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Imath/ImathMatrix.h b/src/Imath/ImathMatrix.h index bbfc247b..7dea34eb 100644 --- a/src/Imath/ImathMatrix.h +++ b/src/Imath/ImathMatrix.h @@ -2735,12 +2735,12 @@ Matrix33::setScale (T s) x[2][0] = 0; x[2][1] = 0; x[2][2] = s; - // FIXME: The old code looked like this: - // memset (x, 0, sizeof (x)); - // x[0][0] = s; - // x[1][1] = s; - // x[2][2] = 1; - // Why is the [2][2] component 1? That seems wrong. + // NOTE: The OpenEXR 2.x code looked made this matrix: + // s 0 0 + // 0 s 0 + // 0 0 1 + // This was deemed to be unexpected odd behavior, so beginning with + // Imath 3.0, we make a uniform 3D scale. return *this; }