Changes to accommodate upcoming Imath vector conversion changes (#2878)

I have submitted a PR to Imath that adds "interopability" constructors and assignment, that use some templates to allow seamless construction, assignment, and passing of "foreign" vector types to Imath vectors. This makes it easier library A that uses Imath vector types in its API to interact with app or library B that has a custom internal vector type, without a ton of ugly casts and copies. (See AcademySoftwareFoundation/Imath#91) These created a few minor conflicts with OIIO's internal SIMD classes and their construction and casting from Imath types. This patch cleans up the ambiguities: * Make Imath::V4f -> simd::vfloat4 and Imath::M44f -> simd::matrix44 constructor `explicit`. * Make the simd types `simd()` method (returning the underlying raw SIMD type) also have a variety that returns a ref to a non-const vector, to ensure proper working of the _MM_TRANSPOSE4_PS macro. Now I am going to commit a sin. This MUST get backported to OIIO 2.2 in order for it to not break against the upcoming Imath 3.0. This doesn't break ABI compatibility, nor change OIIO's primary advertised public APIs, but it does change the technically/informally public APIs of simd.h in ways that introduce the possibility that an app using those simd vector classes *might* need minor source code editing if they relied on those implicit constructors. Such software might need certain `simd::vector4 foo = imath_bar` to become `foo = vector4(bar)` and the same for certain matrix44 assignments. It is usually a big no-no to backport any change that could require editing of source code. But since the alternative is not being able to use the new Imath, I think this is the lesser evil, and maybe nobody out there is doing the thing that would need to be changed?
AcademySoftwareFoundation · Mar 1, 2021 · fc17e94 · fc17e94
1 parent 86d910b
commit fc17e94
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 33 deletions.
diff --git a/src/include/OpenImageIO/simd.h b/src/include/OpenImageIO/simd.h
@@ -484,6 +484,7 @@ class vbool4 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Extract the bitmask
     int bitmask () const;
@@ -624,6 +625,7 @@ class vbool8 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Extract the bitmask
     int bitmask () const;
@@ -778,6 +780,7 @@ class vbool16 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     int bitmask () const;
 
@@ -929,6 +932,7 @@ class vint4 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Return a pointer to the underlying scalar type
     const value_t* data () const { return (const value_t*)this; }
@@ -1218,6 +1222,7 @@ class vint8 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Return a pointer to the underlying scalar type
     const value_t* data () const { return (const value_t*)this; }
@@ -1514,6 +1519,7 @@ class vint16 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Return a pointer to the underlying scalar type
     const value_t* data () const { return (const value_t*)this; }
@@ -1803,19 +1809,20 @@ class vfloat4 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Return a pointer to the underlying scalar type
     const value_t* data () const { return (const value_t*)this; }
     value_t* data () { return (value_t*)this; }
 
     /// Construct from a Imath::V3f
-    vfloat4 (const Imath::V3f &v) { load (v[0], v[1], v[2]); }
+    explicit vfloat4 (const Imath::V3f &v) { load (v[0], v[1], v[2]); }
 
     /// Cast to a Imath::V3f
     const Imath::V3f& V3f () const { return *(const Imath::V3f*)this; }
 
     /// Construct from a Imath::V4f
-    vfloat4 (const Imath::V4f &v) { load ((const float *)&v); }
+    explicit vfloat4 (const Imath::V4f &v) { load ((const float *)&v); }
 
     /// Cast to a Imath::V4f
     const Imath::V4f& V4f () const { return *(const Imath::V4f*)this; }
@@ -2279,7 +2286,7 @@ class matrix44 {
     { }
 
     /// Construct from a reference to an Imath::M44f
-    OIIO_FORCEINLINE matrix44 (const Imath::M44f &M) {
+    OIIO_FORCEINLINE explicit matrix44 (const Imath::M44f &M) {
 #if OIIO_SIMD_SSE
         m_row[0].load (M[0]);
         m_row[1].load (M[1]);
@@ -2450,6 +2457,7 @@ class vfloat8 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Return a pointer to the underlying scalar type
     const value_t* data () const { return (const value_t*)this; }
@@ -2772,6 +2780,7 @@ class vfloat16 {
     /// Return the raw SIMD type
     operator simd_t () const { return m_simd; }
     simd_t simd () const { return m_simd; }
+    simd_t& simd () { return m_simd; }
 
     /// Return a pointer to the underlying scalar type
     const value_t* data () const { return (const value_t*)this; }
@@ -7723,7 +7732,7 @@ OIIO_FORCEINLINE T log (const T& v)
 OIIO_FORCEINLINE void transpose (vfloat4 &a, vfloat4 &b, vfloat4 &c, vfloat4 &d)
 {
 #if OIIO_SIMD_SSE
-    _MM_TRANSPOSE4_PS (a, b, c, d);
+    _MM_TRANSPOSE4_PS (a.simd(), b.simd(), c.simd(), d.simd());
 #else
     vfloat4 A (a[0], b[0], c[0], d[0]);
     vfloat4 B (a[1], b[1], c[1], d[1]);
@@ -7739,14 +7748,14 @@ OIIO_FORCEINLINE void transpose (const vfloat4& a, const vfloat4& b, const vfloa
 {
 #if OIIO_SIMD_SSE
     //_MM_TRANSPOSE4_PS (a, b, c, d);
-    vfloat4 l02 = _mm_unpacklo_ps (a, c);
-    vfloat4 h02 = _mm_unpackhi_ps (a, c);
-    vfloat4 l13 = _mm_unpacklo_ps (b, d);
-    vfloat4 h13 = _mm_unpackhi_ps (b, d);
-    r0 = _mm_unpacklo_ps (l02, l13);
-    r1 = _mm_unpackhi_ps (l02, l13);
-    r2 = _mm_unpacklo_ps (h02, h13);
-    r3 = _mm_unpackhi_ps (h02, h13);
+    auto l02 = _mm_unpacklo_ps (a, c);
+    auto h02 = _mm_unpackhi_ps (a, c);
+    auto l13 = _mm_unpacklo_ps (b, d);
+    auto h13 = _mm_unpackhi_ps (b, d);
+    r0 = vfloat4(_mm_unpacklo_ps (l02, l13));
+    r1 = vfloat4(_mm_unpackhi_ps (l02, l13));
+    r2 = vfloat4(_mm_unpacklo_ps (h02, h13));
+    r3 = vfloat4(_mm_unpackhi_ps (h02, h13));
 #else
     r0.load (a[0], b[0], c[0], d[0]);
     r1.load (a[1], b[1], c[1], d[1]);
@@ -8113,7 +8122,7 @@ OIIO_FORCEINLINE matrix44 matrix44::transposed () const {
     simd::transpose (m_row[0], m_row[1], m_row[2], m_row[3],
                      T.m_row[0], T.m_row[1], T.m_row[2], T.m_row[3]);
 #else
-    T = m_mat.transposed();
+    T.m_mat = m_mat.transposed();
 #endif
     return T;
 }
@@ -8238,14 +8247,14 @@ OIIO_FORCEINLINE matrix44 matrix44::inverse() const {
     vfloat4 det, tmp1;
     const float *src = (const float *)this;
     vfloat4 zero = vfloat4::Zero();
-    tmp1 = _mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src)), (__m64*)(src+ 4));
-    row1 = _mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+8)), (__m64*)(src+12));
-    row0 = _mm_shuffle_ps(tmp1, row1, 0x88);
-    row1 = _mm_shuffle_ps(row1, tmp1, 0xDD);
-    tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6));
-    row3 = _mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+10)), (__m64*)(src+14));
-    row2 = _mm_shuffle_ps(tmp1, row3, 0x88);
-    row3 = _mm_shuffle_ps(row3, tmp1, 0xDD);
+    tmp1 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src)), (__m64*)(src+ 4)));
+    row1 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+8)), (__m64*)(src+12)));
+    row0 = vfloat4(_mm_shuffle_ps(tmp1, row1, 0x88));
+    row1 = vfloat4(_mm_shuffle_ps(row1, tmp1, 0xDD));
+    tmp1 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6)));
+    row3 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+10)), (__m64*)(src+14)));
+    row2 = vfloat4(_mm_shuffle_ps(tmp1, row3, 0x88));
+    row3 = vfloat4(_mm_shuffle_ps(row3, tmp1, 0xDD));
     // -----------------------------------------------
     tmp1 = row2 * row3;
     tmp1 = shuffle<1,0,3,2>(tmp1);
@@ -8301,13 +8310,13 @@ OIIO_FORCEINLINE matrix44 matrix44::inverse() const {
     // -----------------------------------------------
     det = row0 * minor0;
     det = shuffle<2,3,0,1>(det) + det;
-    det = _mm_add_ss(shuffle<1,0,3,2>(det), det);
-    tmp1 = _mm_rcp_ss(det);
-    det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
+    det = vfloat4(_mm_add_ss(shuffle<1,0,3,2>(det), det));
+    tmp1 = vfloat4(_mm_rcp_ss(det));
+    det = vfloat4(_mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1))));
     det = shuffle<0>(det);
     return matrix44 (det*minor0, det*minor1, det*minor2, det*minor3);
 #else
-    return m_mat.inverse();
+    return matrix44 (m_mat.inverse());
 #endif
 }
 
@@ -8332,7 +8341,7 @@ OIIO_FORCEINLINE vfloat3 transformp (const Imath::M44f &M, const vfloat3 &V)
     return matrix44(M).transformp (V);
 #else
     Imath::V3f R;
-    M.multVecMatrix (*(Imath::V3f *)&V, R);
+    M.multVecMatrix (*(const Imath::V3f *)&V, R);
     return vfloat3(R);
 #endif
 }
@@ -8348,7 +8357,7 @@ OIIO_FORCEINLINE vfloat3 transformv (const Imath::M44f &M, const vfloat3 &V)
     return matrix44(M).transformv (V);
 #else
     Imath::V3f R;
-    M.multDirMatrix (*(Imath::V3f *)&V, R);
+    M.multDirMatrix (*(const Imath::V3f *)&V, R);
     return vfloat3(R);
 #endif
 }

diff --git a/src/libutil/simd_test.cpp b/src/libutil/simd_test.cpp
@@ -1845,10 +1845,10 @@ test_matrix()
         matrix44 m(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
         Imath::V4f V(1,2,3,4);
         vfloat4 v(1,2,3,4);
-        vfloat4 mv = m*v;
         vfloat4 vm = v*m;
-        OIIO_CHECK_SIMD_EQUAL(mv, M*V);
-        OIIO_CHECK_SIMD_EQUAL(vm, V*M);
+        OIIO_CHECK_SIMD_EQUAL(vm, vfloat4(V*M));
+        // vfloat4 mv = m*v;
+        // OIIO_CHECK_SIMD_EQUAL(mv, M*V);
         benchmark2("V4 * M44 Imath", mul_vm_imath, V, M, 1);
         // benchmark2("M44 * V4 Imath", mul_mv_imath, mx, v4x, 1);
         benchmark2("M44 * V4 simd", mul_mv_simd, m, v, 1);
@@ -1866,9 +1866,11 @@ test_matrix()
         OIIO_CHECK_NE(Mtrans, mr);
     }
     OIIO_CHECK_ASSERT(
-        mx_equal_thresh(Mtrans.inverse(), matrix44(Mtrans).inverse(), 1.0e-6f));
+        mx_equal_thresh(matrix44(Mtrans.inverse()), matrix44(Mtrans).inverse(),
+                        1.0e-6f));
     OIIO_CHECK_ASSERT(
-        mx_equal_thresh(Mrot.inverse(), matrix44(Mrot).inverse(), 1.0e-6f));
+        mx_equal_thresh(matrix44(Mrot.inverse()), matrix44(Mrot).inverse(),
+                        1.0e-6f));
     OIIO_CHECK_EQUAL(
         matrix44(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
         Imath::M44f(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
@@ -1885,7 +1887,7 @@ test_matrix()
     iterations /= 2;
     benchmark("m44 inverse Imath", inverse_imath, mx, 1);
     // std::cout << "inv " << matrix44(inverse_imath(mx)) << "\n";
-    benchmark("m44 inverse_simd", inverse_simd, mx, 1);
+    benchmark("m44 inverse_simd", inverse_simd, matrix44(mx), 1);
     // std::cout << "inv " << inverse_simd(mx) << "\n";
     benchmark("m44 inverse_simd native simd", inverse_simd, matrix44(mx), 1);
     // std::cout << "inv " << inverse_simd(mx) << "\n";