Skip to content

Commit

Permalink
Changes to accommodate upcoming Imath vector conversion changes (#2878)
Browse files Browse the repository at this point in the history
I have submitted a PR to Imath that adds "interopability" constructors
and assignment, that use some templates to allow seamless
construction, assignment, and passing of "foreign" vector types to
Imath vectors. This makes it easier library A that uses Imath vector
types in its API to interact with app or library B that has a custom
internal vector type, without a ton of ugly casts and copies.
(See AcademySoftwareFoundation/Imath#91)

These created a few minor conflicts with OIIO's internal SIMD classes
and their construction and casting from Imath types. This patch cleans
up the ambiguities:

* Make Imath::V4f -> simd::vfloat4 and Imath::M44f -> simd::matrix44
  constructor `explicit`.

* Make the simd types `simd()` method (returning the underlying raw SIMD
  type) also have a variety that returns a ref to a non-const vector,
  to ensure proper working of the _MM_TRANSPOSE4_PS macro.

Now I am going to commit a sin. This MUST get backported to OIIO 2.2
in order for it to not break against the upcoming Imath 3.0.  This
doesn't break ABI compatibility, nor change OIIO's primary advertised
public APIs, but it does change the technically/informally public APIs
of simd.h in ways that introduce the possibility that an app using
those simd vector classes *might* need minor source code editing if
they relied on those implicit constructors. Such software might need
certain `simd::vector4 foo = imath_bar` to become `foo = vector4(bar)`
and the same for certain matrix44 assignments.  It is usually a big
no-no to backport any change that could require editing of source
code.  But since the alternative is not being able to use the new
Imath, I think this is the lesser evil, and maybe nobody out there is
doing the thing that would need to be changed?
  • Loading branch information
lgritz authored Mar 1, 2021
1 parent 86d910b commit fc17e94
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 33 deletions.
63 changes: 36 additions & 27 deletions src/include/OpenImageIO/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ class vbool4 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Extract the bitmask
int bitmask () const;
Expand Down Expand Up @@ -624,6 +625,7 @@ class vbool8 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Extract the bitmask
int bitmask () const;
Expand Down Expand Up @@ -778,6 +780,7 @@ class vbool16 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

int bitmask () const;

Expand Down Expand Up @@ -929,6 +932,7 @@ class vint4 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Return a pointer to the underlying scalar type
const value_t* data () const { return (const value_t*)this; }
Expand Down Expand Up @@ -1218,6 +1222,7 @@ class vint8 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Return a pointer to the underlying scalar type
const value_t* data () const { return (const value_t*)this; }
Expand Down Expand Up @@ -1514,6 +1519,7 @@ class vint16 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Return a pointer to the underlying scalar type
const value_t* data () const { return (const value_t*)this; }
Expand Down Expand Up @@ -1803,19 +1809,20 @@ class vfloat4 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Return a pointer to the underlying scalar type
const value_t* data () const { return (const value_t*)this; }
value_t* data () { return (value_t*)this; }

/// Construct from a Imath::V3f
vfloat4 (const Imath::V3f &v) { load (v[0], v[1], v[2]); }
explicit vfloat4 (const Imath::V3f &v) { load (v[0], v[1], v[2]); }

/// Cast to a Imath::V3f
const Imath::V3f& V3f () const { return *(const Imath::V3f*)this; }

/// Construct from a Imath::V4f
vfloat4 (const Imath::V4f &v) { load ((const float *)&v); }
explicit vfloat4 (const Imath::V4f &v) { load ((const float *)&v); }

/// Cast to a Imath::V4f
const Imath::V4f& V4f () const { return *(const Imath::V4f*)this; }
Expand Down Expand Up @@ -2279,7 +2286,7 @@ class matrix44 {
{ }

/// Construct from a reference to an Imath::M44f
OIIO_FORCEINLINE matrix44 (const Imath::M44f &M) {
OIIO_FORCEINLINE explicit matrix44 (const Imath::M44f &M) {
#if OIIO_SIMD_SSE
m_row[0].load (M[0]);
m_row[1].load (M[1]);
Expand Down Expand Up @@ -2450,6 +2457,7 @@ class vfloat8 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Return a pointer to the underlying scalar type
const value_t* data () const { return (const value_t*)this; }
Expand Down Expand Up @@ -2772,6 +2780,7 @@ class vfloat16 {
/// Return the raw SIMD type
operator simd_t () const { return m_simd; }
simd_t simd () const { return m_simd; }
simd_t& simd () { return m_simd; }

/// Return a pointer to the underlying scalar type
const value_t* data () const { return (const value_t*)this; }
Expand Down Expand Up @@ -7723,7 +7732,7 @@ OIIO_FORCEINLINE T log (const T& v)
OIIO_FORCEINLINE void transpose (vfloat4 &a, vfloat4 &b, vfloat4 &c, vfloat4 &d)
{
#if OIIO_SIMD_SSE
_MM_TRANSPOSE4_PS (a, b, c, d);
_MM_TRANSPOSE4_PS (a.simd(), b.simd(), c.simd(), d.simd());
#else
vfloat4 A (a[0], b[0], c[0], d[0]);
vfloat4 B (a[1], b[1], c[1], d[1]);
Expand All @@ -7739,14 +7748,14 @@ OIIO_FORCEINLINE void transpose (const vfloat4& a, const vfloat4& b, const vfloa
{
#if OIIO_SIMD_SSE
//_MM_TRANSPOSE4_PS (a, b, c, d);
vfloat4 l02 = _mm_unpacklo_ps (a, c);
vfloat4 h02 = _mm_unpackhi_ps (a, c);
vfloat4 l13 = _mm_unpacklo_ps (b, d);
vfloat4 h13 = _mm_unpackhi_ps (b, d);
r0 = _mm_unpacklo_ps (l02, l13);
r1 = _mm_unpackhi_ps (l02, l13);
r2 = _mm_unpacklo_ps (h02, h13);
r3 = _mm_unpackhi_ps (h02, h13);
auto l02 = _mm_unpacklo_ps (a, c);
auto h02 = _mm_unpackhi_ps (a, c);
auto l13 = _mm_unpacklo_ps (b, d);
auto h13 = _mm_unpackhi_ps (b, d);
r0 = vfloat4(_mm_unpacklo_ps (l02, l13));
r1 = vfloat4(_mm_unpackhi_ps (l02, l13));
r2 = vfloat4(_mm_unpacklo_ps (h02, h13));
r3 = vfloat4(_mm_unpackhi_ps (h02, h13));
#else
r0.load (a[0], b[0], c[0], d[0]);
r1.load (a[1], b[1], c[1], d[1]);
Expand Down Expand Up @@ -8113,7 +8122,7 @@ OIIO_FORCEINLINE matrix44 matrix44::transposed () const {
simd::transpose (m_row[0], m_row[1], m_row[2], m_row[3],
T.m_row[0], T.m_row[1], T.m_row[2], T.m_row[3]);
#else
T = m_mat.transposed();
T.m_mat = m_mat.transposed();
#endif
return T;
}
Expand Down Expand Up @@ -8238,14 +8247,14 @@ OIIO_FORCEINLINE matrix44 matrix44::inverse() const {
vfloat4 det, tmp1;
const float *src = (const float *)this;
vfloat4 zero = vfloat4::Zero();
tmp1 = _mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src)), (__m64*)(src+ 4));
row1 = _mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+8)), (__m64*)(src+12));
row0 = _mm_shuffle_ps(tmp1, row1, 0x88);
row1 = _mm_shuffle_ps(row1, tmp1, 0xDD);
tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6));
row3 = _mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+10)), (__m64*)(src+14));
row2 = _mm_shuffle_ps(tmp1, row3, 0x88);
row3 = _mm_shuffle_ps(row3, tmp1, 0xDD);
tmp1 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src)), (__m64*)(src+ 4)));
row1 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+8)), (__m64*)(src+12)));
row0 = vfloat4(_mm_shuffle_ps(tmp1, row1, 0x88));
row1 = vfloat4(_mm_shuffle_ps(row1, tmp1, 0xDD));
tmp1 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6)));
row3 = vfloat4(_mm_loadh_pi(_mm_loadl_pi(zero, (__m64*)(src+10)), (__m64*)(src+14)));
row2 = vfloat4(_mm_shuffle_ps(tmp1, row3, 0x88));
row3 = vfloat4(_mm_shuffle_ps(row3, tmp1, 0xDD));
// -----------------------------------------------
tmp1 = row2 * row3;
tmp1 = shuffle<1,0,3,2>(tmp1);
Expand Down Expand Up @@ -8301,13 +8310,13 @@ OIIO_FORCEINLINE matrix44 matrix44::inverse() const {
// -----------------------------------------------
det = row0 * minor0;
det = shuffle<2,3,0,1>(det) + det;
det = _mm_add_ss(shuffle<1,0,3,2>(det), det);
tmp1 = _mm_rcp_ss(det);
det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
det = vfloat4(_mm_add_ss(shuffle<1,0,3,2>(det), det));
tmp1 = vfloat4(_mm_rcp_ss(det));
det = vfloat4(_mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1))));
det = shuffle<0>(det);
return matrix44 (det*minor0, det*minor1, det*minor2, det*minor3);
#else
return m_mat.inverse();
return matrix44 (m_mat.inverse());
#endif
}

Expand All @@ -8332,7 +8341,7 @@ OIIO_FORCEINLINE vfloat3 transformp (const Imath::M44f &M, const vfloat3 &V)
return matrix44(M).transformp (V);
#else
Imath::V3f R;
M.multVecMatrix (*(Imath::V3f *)&V, R);
M.multVecMatrix (*(const Imath::V3f *)&V, R);
return vfloat3(R);
#endif
}
Expand All @@ -8348,7 +8357,7 @@ OIIO_FORCEINLINE vfloat3 transformv (const Imath::M44f &M, const vfloat3 &V)
return matrix44(M).transformv (V);
#else
Imath::V3f R;
M.multDirMatrix (*(Imath::V3f *)&V, R);
M.multDirMatrix (*(const Imath::V3f *)&V, R);
return vfloat3(R);
#endif
}
Expand Down
14 changes: 8 additions & 6 deletions src/libutil/simd_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1845,10 +1845,10 @@ test_matrix()
matrix44 m(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
Imath::V4f V(1,2,3,4);
vfloat4 v(1,2,3,4);
vfloat4 mv = m*v;
vfloat4 vm = v*m;
OIIO_CHECK_SIMD_EQUAL(mv, M*V);
OIIO_CHECK_SIMD_EQUAL(vm, V*M);
OIIO_CHECK_SIMD_EQUAL(vm, vfloat4(V*M));
// vfloat4 mv = m*v;
// OIIO_CHECK_SIMD_EQUAL(mv, M*V);
benchmark2("V4 * M44 Imath", mul_vm_imath, V, M, 1);
// benchmark2("M44 * V4 Imath", mul_mv_imath, mx, v4x, 1);
benchmark2("M44 * V4 simd", mul_mv_simd, m, v, 1);
Expand All @@ -1866,9 +1866,11 @@ test_matrix()
OIIO_CHECK_NE(Mtrans, mr);
}
OIIO_CHECK_ASSERT(
mx_equal_thresh(Mtrans.inverse(), matrix44(Mtrans).inverse(), 1.0e-6f));
mx_equal_thresh(matrix44(Mtrans.inverse()), matrix44(Mtrans).inverse(),
1.0e-6f));
OIIO_CHECK_ASSERT(
mx_equal_thresh(Mrot.inverse(), matrix44(Mrot).inverse(), 1.0e-6f));
mx_equal_thresh(matrix44(Mrot.inverse()), matrix44(Mrot).inverse(),
1.0e-6f));
OIIO_CHECK_EQUAL(
matrix44(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
Imath::M44f(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
Expand All @@ -1885,7 +1887,7 @@ test_matrix()
iterations /= 2;
benchmark("m44 inverse Imath", inverse_imath, mx, 1);
// std::cout << "inv " << matrix44(inverse_imath(mx)) << "\n";
benchmark("m44 inverse_simd", inverse_simd, mx, 1);
benchmark("m44 inverse_simd", inverse_simd, matrix44(mx), 1);
// std::cout << "inv " << inverse_simd(mx) << "\n";
benchmark("m44 inverse_simd native simd", inverse_simd, matrix44(mx), 1);
// std::cout << "inv " << inverse_simd(mx) << "\n";
Expand Down

0 comments on commit fc17e94

Please sign in to comment.