diff --git a/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp b/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp index f040bcccf0d2..d1f78143f9f3 100644 --- a/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp +++ b/packages/phalanx/src/Phalanx_KokkosViewOfViews.hpp @@ -102,7 +102,7 @@ namespace PHX { }; // **************************** - // ViewOfViews: new version + // ViewOfViews: new version (inner views use Unmanaged template parameter) // **************************** namespace details { @@ -210,7 +210,7 @@ namespace PHX { is_initialized_ = true; } - /// Set an innder device view on the outer view. Indices are the outer view indices. + /// Set an inner device view on the outer view. Indices are the outer view indices. template void setView(InnerViewType v,Indices... i) { @@ -245,6 +245,138 @@ namespace PHX { } }; + // **************************** + // ViewOfViews: third version (inner views are runtime unmanaged - no Unmanaged template parameter) + // **************************** + + /** Wrapper class that correctly handles ViewOfViews construction + and object lifetime. This class makes sure the host view stays + in scope for the life of the device view and makes sure that the + device is synced to host before use. + + Main restrictions: + + 1. When UVM is not used in the outer view, we need to allocate + the outer VofV on host and copy to device to initialize the + inner views correctly (tracking object). + + 2. Step 1 means that the host view must exist as long as the + device view is being used, otherwise the views may go out of + scope. This object exists to pair up the host and device view to + make sure the inner views are not deleted early. + + 3. Normally we use an unmanaged view (constructed with the + Unmanaged template parameter) for the inner views to prevent + double deletion. However, there are use cases where it's painful + to pass around views built with the unmanaged template parameter + (libraries with finctions that block the unmanaged argument). We + can generate an unmanged view without the template parameter by + constructing the view with a raw pointer. This thrid + implementation does that here. + */ + template + class ViewOfViews3 { + + public: + // Layout of the outer view doesn't matter for performance so we + // use a raw Kokkos::View instead of PHX::View. The inner views are + // what is important for performance. + using OuterDataType = typename PHX::v_of_v_utils::add_pointer::type; + using OuterViewType = Kokkos::View; + + private: + // Inner views are mananged - used to prevent early deletion + typename OuterViewType::HostMirror view_host_; + // Inner views are unmanaged by runtime construction with pointer + // (avoids template parameter). Used to correctly initialize outer + // device view on device. + typename OuterViewType::HostMirror view_host_unmanaged_; + // Device view + OuterViewType view_device_; + // True if the host view has not been synced to device + bool device_view_is_synced_; + // True if the outer view has been initialized + bool is_initialized_; + // Use count after initialization. This changes based on whether the device space is accessible to the host space. + int use_count_; + + public: + template + ViewOfViews3(const std::string name,Extents... extents) + : view_host_(name,extents...), + view_device_(name,extents...), + device_view_is_synced_(false), + is_initialized_(true) + { + view_host_unmanaged_ = Kokkos::create_mirror_view(view_device_); + use_count_ = view_device_.impl_track().use_count(); + } + + ViewOfViews3() + : device_view_is_synced_(false), + is_initialized_(false), + use_count_(-1) + {} + + ~ViewOfViews3() + { + // Make sure there is not another object pointing to device view + // since the host view will delete the inner views on exit. + if (view_device_.impl_track().use_count() != use_count_) + Kokkos::abort("\n ERROR - PHX::ViewOfViews - please free all instances of device ViewOfView \n before deleting the host ViewOfView!\n\n"); + } + + /// Allocate the out view objects. Extents are for the outer view. + template + void initialize(const std::string name,Extents... extents) + { + view_host_ = typename OuterViewType::HostMirror(name,extents...); + view_device_ = OuterViewType(name,extents...); + view_host_unmanaged_ = Kokkos::create_mirror_view(view_device_); + device_view_is_synced_ = false; + is_initialized_ = true; + use_count_ = view_device_.impl_track().use_count(); + } + + // Returns true if the outer view has been initialized. + bool is_initialized() {return is_initialized_;} + + template + void addView(InnerViewType v,Indices... i) + { + TEUCHOS_ASSERT(is_initialized_); + // Store the managed version so it doesn't get deleted. + view_host_(i...) = v; + // Store a runtime unmanaged view to prevent double deletion on device + view_host_unmanaged_(i...) = InnerViewType(v.data(),v.layout()); + device_view_is_synced_ = false; + } + + /// Note this only syncs the outer view. The inner views are + /// assumed to be on device for both host and device outer views. + void syncHostToDevice() + { + TEUCHOS_ASSERT(is_initialized_); + Kokkos::deep_copy(view_device_,view_host_unmanaged_); + device_view_is_synced_ = true; + } + + /// Returns a host mirror view for the outer view, where the inner + /// views are still on device. + auto getViewHost() + { + TEUCHOS_ASSERT(is_initialized_); + return view_host_; + } + + /// Returns device view of views + auto getViewDevice() + { + KOKKOS_ASSERT(device_view_is_synced_); + return view_device_; + } + }; + } #endif diff --git a/packages/phalanx/test/Kokkos/tKokkosViewOfViews.cpp b/packages/phalanx/test/Kokkos/tKokkosViewOfViews.cpp index ad2c14d7a271..4e460abda4c9 100644 --- a/packages/phalanx/test/Kokkos/tKokkosViewOfViews.cpp +++ b/packages/phalanx/test/Kokkos/tKokkosViewOfViews.cpp @@ -1,8 +1,19 @@ #include "Kokkos_Core.hpp" #include "Teuchos_UnitTestHarness.hpp" +// Force this test to always run without UVM. +// For Cuda builds, ignore the default memory space in the Cuda +// execution space since it can be set to UVM at +// configure. For non-Cuda builds, just use the default memory space +// in the execution space. +namespace Kokkos { + class Cuda; + class CudaSpace; +} using exec_t = Kokkos::DefaultExecutionSpace; -using mem_t = Kokkos::DefaultExecutionSpace::memory_space; +using mem_t = std::conditional::value, + Kokkos::CudaSpace, + Kokkos::DefaultExecutionSpace::memory_space>::type; // ************************************* // This test demonstrates how to create a view of views from separate @@ -84,3 +95,59 @@ TEUCHOS_UNIT_TEST(ViewOfViews,from_separate_views) { // new (&v_host(i)) InnerView(a); // for (size_t i=0; i < v_host.extent(0); ++i) // v_host(i).~InnerView(); + +// ************************************* +// This test checks if we can use a Kokkos::Array with view of views +// ************************************* +TEUCHOS_UNIT_TEST(ViewOfViews,ArrayOfViews) { + + const int num_cells = 10; + const int num_pts = 8; + const int num_equations = 32; + + // Requirement 1: The inner view must be unmanaged on device to + // prevent double deletion! To initialize correctly, we need to deep + // copy from host with the inner views propeties matching exactly on + // host and device. + + using InnerViewUnmanaged = Kokkos::View>; + using InnerView = Kokkos::View; + + // For making sure the views are not destroyed. + // std::vector vec(3); + + Kokkos::Array a_of_v; + { + InnerView a("a",num_cells,num_pts,num_equations); + InnerView b("b",num_cells,num_pts,num_equations); + InnerView c("c",num_cells,num_pts,num_equations); + + // vec[0] = a; + // vec[1] = b; + // vec[2] = c; + + a_of_v[0] = a; + a_of_v[1] = b; + a_of_v[2] = c; + + Kokkos::deep_copy(a,1.0); + Kokkos::deep_copy(b,2.0); + Kokkos::deep_copy(c,3.0); + } + + auto policy = Kokkos::MDRangePolicy>({0,0,0},{num_cells,num_pts,num_equations}); + Kokkos::parallel_for("view of view test",policy,KOKKOS_LAMBDA (const int cell,const int pt, const int eq) { + a_of_v[2](cell,pt,eq) += a_of_v[0](cell,pt,eq) + a_of_v[1](cell,pt,eq); + }); + + auto c_host = Kokkos::create_mirror_view(a_of_v[2]); + Kokkos::deep_copy(c_host,a_of_v[2]); + const auto tol = std::numeric_limits::epsilon() * 100.0; + for (int cell=0; cell < num_cells; ++cell) + for (int pt=0; pt < num_pts; ++pt) + for (int eq=0; eq < num_equations; ++eq) { + TEST_FLOATING_EQUALITY(c_host(cell,pt,eq),6.0,tol); + } + +} + diff --git a/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp b/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp index a92dd3c54b9c..3f3f26fab12d 100644 --- a/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp +++ b/packages/phalanx/test/ViewOfViews/tPhalanxViewOfViews.cpp @@ -123,6 +123,116 @@ TEUCHOS_UNIT_TEST(PhalanxViewOfViews,NewImpl) { } +TEUCHOS_UNIT_TEST(PhalanxViewOfViews,ViewOfView3_EmptyCtor) { + + const int num_cells = 10; + const int num_pts = 8; + const int num_equations = 32; + + Kokkos::View a("a",num_cells,num_pts,num_equations); + Kokkos::View b("b",num_cells,num_pts,num_equations); + Kokkos::View c("c",num_cells,num_pts,num_equations); + Kokkos::View d("d",num_cells,num_pts,num_equations); + + Kokkos::deep_copy(a,2.0); + Kokkos::deep_copy(b,3.0); + Kokkos::deep_copy(c,4.0); + + { + using InnerView = Kokkos::View; + constexpr int OuterViewRank = 2; + PHX::ViewOfViews3 v_of_v; + + TEST_ASSERT(!v_of_v.is_initialized()); + v_of_v.initialize("outer host",2,2); + TEST_ASSERT(v_of_v.is_initialized()); + + v_of_v.addView(a,0,0); + v_of_v.addView(b,0,1); + v_of_v.addView(c,1,0); + v_of_v.addView(d,1,1); + + v_of_v.syncHostToDevice(); + + { + auto v_dev = v_of_v.getViewDevice(); + auto policy = Kokkos::MDRangePolicy>({0,0,0},{num_cells,num_pts,num_equations}); + Kokkos::parallel_for("view of view test",policy,KOKKOS_LAMBDA (const int cell,const int pt, const int eq) { + v_dev(1,1)(cell,pt,eq) = v_dev(0,0)(cell,pt,eq) + v_dev(0,1)(cell,pt,eq) + v_dev(1,0)(cell,pt,eq); + }); + } + + // Uncomment the line below to prove the ViewOfViews prevents + // device views from outliving host view. This line will cause a + // Kokkos::abort() and error message since v_dev above is still in + // scope when the ViewOfViews is destoryed. + // v_of_v = PHX::ViewOfViews("outer host",2,2); + } + + auto d_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),d); + + const auto tol = std::numeric_limits::epsilon() * 100.0; + for (int cell=0; cell < num_cells; ++cell) + for (int pt=0; pt < num_pts; ++pt) + for (int eq=0; eq < num_equations; ++eq) { + TEST_FLOATING_EQUALITY(d_host(cell,pt,eq),9.0,tol); + } +} + +TEUCHOS_UNIT_TEST(PhalanxViewOfViews,ViewOfView3_TwoArgCtor) { + + const int num_cells = 10; + const int num_pts = 8; + const int num_equations = 32; + + Kokkos::View a("a",num_cells,num_pts,num_equations); + Kokkos::View b("b",num_cells,num_pts,num_equations); + Kokkos::View c("c",num_cells,num_pts,num_equations); + Kokkos::View d("d",num_cells,num_pts,num_equations); + + Kokkos::deep_copy(a,2.0); + Kokkos::deep_copy(b,3.0); + Kokkos::deep_copy(c,4.0); + + { + using InnerView = Kokkos::View; + constexpr int OuterViewRank = 2; + PHX::ViewOfViews3 v_of_v("outer host",2,2); + + TEST_ASSERT(v_of_v.is_initialized()); + + v_of_v.addView(a,0,0); + v_of_v.addView(b,0,1); + v_of_v.addView(c,1,0); + v_of_v.addView(d,1,1); + + v_of_v.syncHostToDevice(); + + { + auto v_dev = v_of_v.getViewDevice(); + auto policy = Kokkos::MDRangePolicy>({0,0,0},{num_cells,num_pts,num_equations}); + Kokkos::parallel_for("view of view test",policy,KOKKOS_LAMBDA (const int cell,const int pt, const int eq) { + v_dev(1,1)(cell,pt,eq) = v_dev(0,0)(cell,pt,eq) + v_dev(0,1)(cell,pt,eq) + v_dev(1,0)(cell,pt,eq); + }); + } + + // Uncomment the line below to prove the ViewOfViews prevents + // device views from outliving host view. This line will cause a + // Kokkos::abort() and error message since v_dev above is still in + // scope when the ViewOfViews is destoryed. + // v_of_v = PHX::ViewOfViews("outer host",2,2); + } + + auto d_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),d); + + const auto tol = std::numeric_limits::epsilon() * 100.0; + for (int cell=0; cell < num_cells; ++cell) + for (int pt=0; pt < num_pts; ++pt) + for (int eq=0; eq < num_equations; ++eq) { + TEST_FLOATING_EQUALITY(d_host(cell,pt,eq),9.0,tol); + } +} + // ******************************** // Demonstrates an alternative path for ViewOfViews that uses a user // defined wrapper and the assignment operator on device to disable