Skip to content

Commit

Permalink
Improve scheduling (#73)
Browse files Browse the repository at this point in the history
- tangent_basis takes ExecutionSpace instance as argument.
- Remove unecessary fences
- Ensure synchronous deep_copies
- Fixes
- Use DDC fork
  • Loading branch information
blegouix authored Jan 3, 2025
1 parent be4c2a8 commit 3420667
Show file tree
Hide file tree
Showing 12 changed files with 50 additions and 59 deletions.
3 changes: 3 additions & 0 deletions .devnote
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ SPDX-License-Identifier: GPL-3.0-or-later
- Consider removing metric_prod_t (and every helper producing a tensor). At least, standardize API.
- Relabelizion in different file ?
- Streams in Laplacian
- Clarify domain() and mem_domain() (the same)
- There are several projection of metric on MetricIndex which is simply a metric.indices_domain()
- swap_character_t

----- SETUP BASIC COMMAND -----

Expand Down
6 changes: 2 additions & 4 deletions include/similie/exterior/coboundary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,16 +253,14 @@ coboundary_tensor_t<TagToAddToCochain, CochainTag, TensorType> coboundary(
CochainTag::rank() + 1,
typename detail::NonSpectatorDimension<
TagToAddToCochain,
typename TensorType::non_indices_domain_t>::type,
typename ExecSpace::memory_space>();
typename TensorType::non_indices_domain_t>::type>(exec_space);

// compute the tangent K-basis for each node of the mesh. This is a local K-chain.
auto lower_chain = tangent_basis<
CochainTag::rank(),
typename detail::NonSpectatorDimension<
TagToAddToCochain,
typename TensorType::non_indices_domain_t>::type,
typename ExecSpace::memory_space>();
typename TensorType::non_indices_domain_t>::type>(exec_space);

// iterate over every node, we will work inside the tangent space associated to each of them
ddc::parallel_for_each(
Expand Down
5 changes: 0 additions & 5 deletions include/similie/exterior/codifferential.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@ codifferential_tensor_t<TagToRemoveFromCochain, CochainTag, TensorType> codiffer
sil::tensor::upper<MetricIndex>,
MuUpSeq,
NuLowSeq>(exec_space, hodge_star, inv_metric);
exec_space.fence();

// Dual tensor
[[maybe_unused]] tensor::TensorAccessor<
Expand All @@ -219,7 +218,6 @@ codifferential_tensor_t<TagToRemoveFromCochain, CochainTag, TensorType> codiffer
KOKKOS_LAMBDA(typename TensorType::non_indices_domain_t::discrete_element_type elem) {
sil::tensor::tensor_prod(dual_tensor[elem], tensor[elem], hodge_star[elem]);
});
exec_space.fence();

// Dual codifferential
[[maybe_unused]] tensor::TensorAccessor<
Expand All @@ -241,7 +239,6 @@ codifferential_tensor_t<TagToRemoveFromCochain, CochainTag, TensorType> codiffer
misc::convert_type_seq_to_t<
tensor::TensorAntisymmetricIndex,
NuLowSeq>>(exec_space, dual_codifferential, dual_tensor);
exec_space.fence();

// Hodge star 2
[[maybe_unused]] sil::tensor::tensor_accessor_for_domain_t<HodgeStarDomain2>
Expand All @@ -257,7 +254,6 @@ codifferential_tensor_t<TagToRemoveFromCochain, CochainTag, TensorType> codiffer
sil::tensor::upper<MetricIndex>,
RhoUpSeq,
SigmaLowSeq>(exec_space, hodge_star2, inv_metric);
exec_space.fence();

// Codifferential
ddc::parallel_for_each(
Expand All @@ -273,7 +269,6 @@ codifferential_tensor_t<TagToRemoveFromCochain, CochainTag, TensorType> codiffer
codifferential_tensor[elem] *= -1;
}
});
exec_space.fence();

return codifferential_tensor;
}
Expand Down
9 changes: 1 addition & 8 deletions include/similie/exterior/laplacian.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ TensorType codifferential_of_coboundary(
sil::tensor::Tensor derivative_tensor(derivative_tensor_alloc);

sil::exterior::deriv<LaplacianDummyIndex, CochainTag>(exec_space, derivative_tensor, tensor);
exec_space.fence();

// Codifferential
sil::exterior::codifferential<
Expand All @@ -54,7 +53,6 @@ TensorType codifferential_of_coboundary(
coboundary_index_t<
LaplacianDummyIndex,
CochainTag>>(exec_space, out_tensor, derivative_tensor, inv_metric);
exec_space.fence();

return out_tensor;
}
Expand Down Expand Up @@ -90,15 +88,13 @@ TensorType coboundary_of_codifferential(
MetricIndex,
LaplacianDummyIndex,
CochainTag>(exec_space, codifferential_tensor, tensor, inv_metric);
exec_space.fence();

// Coboundary
sil::exterior::deriv<
LaplacianDummyIndex,
codifferential_index_t<
LaplacianDummyIndex,
CochainTag>>(exec_space, out_tensor, codifferential_tensor);
exec_space.fence();

return out_tensor;
}
Expand Down Expand Up @@ -132,9 +128,8 @@ TensorType laplacian(
MetricIndex,
LaplacianDummyIndex2,
CochainTag>(exec_space, laplacian_tensor, tensor, inv_metric);
exec_space.fence();
} else if constexpr (CochainTag::rank() < LaplacianDummyIndex::size()) {
auto tmp_alloc = ddc::create_mirror(laplacian_tensor);
auto tmp_alloc = ddc::create_mirror(exec_space, laplacian_tensor);
tensor::Tensor tmp(tmp_alloc);

detail::codifferential_of_coboundary<
Expand All @@ -145,7 +140,6 @@ TensorType laplacian(
MetricIndex,
LaplacianDummyIndex,
CochainTag>(exec_space, tmp, tensor, inv_metric);
exec_space.fence();

ddc::parallel_for_each(
exec_space,
Expand All @@ -158,7 +152,6 @@ TensorType laplacian(
MetricIndex,
LaplacianDummyIndex,
CochainTag>(exec_space, laplacian_tensor, tensor, inv_metric);
exec_space.fence();
} else {
assert(false && "Unsupported differential form in Laplacian operator");
}
Expand Down
21 changes: 11 additions & 10 deletions include/similie/exterior/local_chain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,13 +374,14 @@ LocalChain(Head, Tail...)

namespace detail {

template <std::size_t K, misc::Specialization<ddc::DiscreteDomain> Dom, class MemorySpace>
template <std::size_t K, misc::Specialization<ddc::DiscreteDomain> Dom>
struct TangentBasis;

template <std::size_t K, class... Tag, class MemorySpace>
struct TangentBasis<K, ddc::DiscreteDomain<Tag...>, MemorySpace>
template <std::size_t K, class... Tag>
struct TangentBasis<K, ddc::DiscreteDomain<Tag...>>
{
static auto constexpr run()
template <class ExecSpace>
static auto constexpr run(ExecSpace const& exec_space)
{
std::array<std::ptrdiff_t, sizeof...(Tag)> permutation
= {0 * ddc::type_seq_rank_v<Tag, ddc::detail::TypeSeq<Tag...>>...};
Expand All @@ -394,22 +395,22 @@ struct TangentBasis<K, ddc::DiscreteDomain<Tag...>, MemorySpace>
basis_host(i) = ddc::DiscreteVector<Tag...>();
ddc::detail::array(basis_host(i++)) = permutation;
} while (std::prev_permutation(permutation.begin(), permutation.end()));
Kokkos::View<ddc::DiscreteVector<Tag...>*, MemorySpace> basis
= create_mirror_view_and_copy(MemorySpace(), basis_host);
Kokkos::View<ddc::DiscreteVector<Tag...>*, typename ExecSpace::memory_space> basis
= create_mirror_view_and_copy(exec_space, basis_host);

return LocalChain<
Simplex<K, Tag...>,
Kokkos::LayoutRight,
MemorySpace>(basis, basis.size());
typename ExecSpace::memory_space>(basis, basis.size());
}
};

} // namespace detail

template <std::size_t K, misc::Specialization<ddc::DiscreteDomain> Dom, class MemorySpace>
constexpr auto tangent_basis()
template <std::size_t K, misc::Specialization<ddc::DiscreteDomain> Dom, class ExecSpace>
constexpr auto tangent_basis(ExecSpace const& exec_space)
{
return detail::TangentBasis<K, Dom, MemorySpace>::run();
return detail::TangentBasis<K, Dom>::run(exec_space);
}

template <misc::Specialization<LocalChain> ChainType>
Expand Down
1 change: 1 addition & 0 deletions include/similie/tensor/determinant.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ KOKKOS_FUNCTION typename ViewType::value_type determinant(const ViewType& matrix
return det;
}

// TODO port on GPU
template <misc::Specialization<Tensor> TensorType>
TensorType::element_type determinant(TensorType tensor)
{
Expand Down
5 changes: 4 additions & 1 deletion include/similie/tensor/identity_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,11 @@ struct TensorIdentityIndex
KOKKOS_FUNCTION static constexpr std::array<std::size_t, rank()>
mem_id_to_canonical_natural_ids(std::size_t mem_id)
{
assert(mem_id < mem_size()); // Always false
assert(false);
std::array<std::size_t, rank()> ids;
for (auto i = ids.begin(); i < ids.end(); ++i) {
*i = 0;
}
return ids;
}
};
Expand Down
5 changes: 4 additions & 1 deletion include/similie/tensor/levi_civita_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,11 @@ struct TensorLeviCivitaIndex
KOKKOS_FUNCTION static constexpr std::array<std::size_t, rank()>
mem_id_to_canonical_natural_ids(std::size_t mem_id)
{
assert(mem_id < mem_size()); // Always false
assert(false);
std::array<std::size_t, rank()> ids;
for (auto i = ids.begin(); i < ids.end(); ++i) {
*i = 0;
}
return ids;
}
};
Expand Down
5 changes: 4 additions & 1 deletion include/similie/tensor/lorentzian_sign_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,11 @@ struct TensorLorentzianSignIndex
KOKKOS_FUNCTION static constexpr std::array<std::size_t, rank()>
mem_id_to_canonical_natural_ids(std::size_t mem_id)
{
assert(mem_id < mem_size()); // Always false
assert(false);
std::array<std::size_t, rank()> ids;
for (auto i = ids.begin(); i < ids.end(); ++i) {
*i = 0;
}
return ids;
}
};
Expand Down
33 changes: 15 additions & 18 deletions include/similie/tensor/metric.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ inplace_apply_metric(ExecSpace const& exec_space, TensorType tensor, MetricType
tensor)[elem]);
});
Kokkos::deep_copy(
exec_space,
tensor.allocation_kokkos_view(),
result.allocation_kokkos_view()); // We rely on Kokkos::deep_copy in place of ddc::parallel_deepcopy to avoid type verification of the type dimensions

Expand Down Expand Up @@ -416,14 +417,14 @@ invert_metric_t<MetricType> fill_inverse_metric(
exec_space,
inv_metric.mem_domain(),
KOKKOS_LAMBDA(invert_metric_t<MetricType>::discrete_element_type elem) {
inv_metric(elem)
inv_metric.mem(elem)
= 1.
/ metric(relabelize_indices_in<
swap_character<ddc::to_type_seq_t<
typename MetricType::accessor_t::natural_domain_t>>,
ddc::to_type_seq_t<
typename MetricType::accessor_t::natural_domain_t>>(
elem));
/ metric.mem(relabelize_indices_in<
swap_character<ddc::to_type_seq_t<
typename MetricType::accessor_t::
natural_domain_t>>,
ddc::to_type_seq_t<typename MetricType::accessor_t::
natural_domain_t>>(elem));
});
} else if (misc::Specialization<MetricIndex, TensorSymmetricIndex>) {
// Allocate a buffer mirroring the metric as a full matrix
Expand Down Expand Up @@ -502,16 +503,11 @@ invert_metric_t<MetricType> fill_inverse_metric(
*/

ddc::for_each(
tensor::swap_character<ddc::DiscreteDomain<
tensor::metric_index_1<MetricIndex>,
tensor::metric_index_2<MetricIndex>>>(
inv_metric.natural_domain()),
[=](decltype(tensor::swap_character<ddc::DiscreteDomain<
tensor::metric_index_1<MetricIndex>,
tensor::metric_index_2<MetricIndex>>>(
inv_metric.natural_domain()))::discrete_element_type index) {
// TODO do better, symmetric tensor is filled twice
inv_metric(inv_metric.access_element(elem, index)) = buffer(
tensor::swap_character<ddc::DiscreteDomain<MetricIndex>>(
inv_metric.domain()),
[=](decltype(tensor::swap_character<ddc::DiscreteDomain<MetricIndex>>(
inv_metric.domain()))::discrete_element_type mem_index) {
inv_metric.mem(elem, mem_index) = buffer(
elem,
tensor::relabelize_indices_in<
tensor::swap_character<ddc::detail::TypeSeq<
Expand All @@ -520,7 +516,8 @@ invert_metric_t<MetricType> fill_inverse_metric(
ddc::detail::TypeSeq<
tensor::metric_index_1<MetricIndex>,
tensor::metric_index_2<MetricIndex>>>(
index));
inv_metric.accessor().canonical_natural_element(
mem_index)));
});
});
}
Expand Down
14 changes: 4 additions & 10 deletions include/similie/tensor/tensor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1204,7 +1204,7 @@ struct NaturalTensorProd<
run(Tensor<ElementType,
ddc::DiscreteDomain<HeadDDim1..., TailDDim2...>,
LayoutStridedPolicy,
Kokkos::DefaultHostExecutionSpace::memory_space> prod_tensor,
MemorySpace> prod_tensor,
Tensor<ElementType,
ddc::DiscreteDomain<HeadDDim1..., ContractDDim...>,
LayoutStridedPolicy,
Expand Down Expand Up @@ -1239,15 +1239,9 @@ template <
class ElementType,
class LayoutStridedPolicy,
class MemorySpace>
Tensor<ElementType,
ddc::DiscreteDomain<ProdDDim...>,
LayoutStridedPolicy,
Kokkos::DefaultHostExecutionSpace::memory_space>
tensor_prod(
Tensor<ElementType,
ddc::DiscreteDomain<ProdDDim...>,
LayoutStridedPolicy,
Kokkos::DefaultHostExecutionSpace::memory_space> prod_tensor,
Tensor<ElementType, ddc::DiscreteDomain<ProdDDim...>, LayoutStridedPolicy, MemorySpace> tensor_prod(
Tensor<ElementType, ddc::DiscreteDomain<ProdDDim...>, LayoutStridedPolicy, MemorySpace>
prod_tensor,
Tensor<ElementType, ddc::DiscreteDomain<DDim1...>, LayoutStridedPolicy, MemorySpace>
tensor1,
Tensor<ElementType, ddc::DiscreteDomain<DDim2...>, LayoutStridedPolicy, MemorySpace>
Expand Down
2 changes: 1 addition & 1 deletion vendor/ddc
Submodule ddc updated 53 files
+2 −4 .clang-tidy
+9 −9 benchmarks/deepcopy.cpp
+14 −9 benchmarks/splines.cpp
+0 −34 docs/first_steps.md
+0 −269 examples/.clang-format
+2 −6 examples/CMakeLists.txt
+37 −72 examples/characteristics_advection.cpp
+16 −30 examples/game_of_life.cpp
+59 −107 examples/heat_equation.cpp
+33 −64 examples/heat_equation_spectral.cpp
+100 −151 examples/non_uniform_heat_equation.cpp
+77 −0 examples/pdi_event.cpp
+49 −108 examples/uniform_heat_equation.cpp
+8 −4 include/ddc/chunk.hpp
+1 −1 include/ddc/chunk_common.hpp
+4 −2 include/ddc/chunk_span.hpp
+16 −2 include/ddc/detail/kokkos.hpp
+2 −2 include/ddc/detail/type_seq.hpp
+7 −7 include/ddc/discrete_domain.hpp
+2 −2 include/ddc/discrete_element.hpp
+2 −0 include/ddc/discrete_vector.hpp
+35 −0 include/ddc/for_each.hpp
+19 −47 include/ddc/kernels/fft.hpp
+8 −7 include/ddc/kernels/splines/constant_extrapolation_rule.hpp
+22 −22 include/ddc/kernels/splines/spline_builder.hpp
+19 −19 include/ddc/kernels/splines/spline_builder_2d.hpp
+5 −7 include/ddc/kernels/splines/spline_evaluator.hpp
+6 −8 include/ddc/kernels/splines/spline_evaluator_2d.hpp
+1 −1 include/ddc/kernels/splines/splines_linear_problem.hpp
+2 −2 include/ddc/kernels/splines/splines_linear_problem_sparse.hpp
+4 −4 include/ddc/non_uniform_point_sampling.hpp
+6 −2 include/ddc/parallel_deepcopy.hpp
+12 −17 include/ddc/periodic_sampling.hpp
+67 −1 include/ddc/transform_reduce.hpp
+11 −16 include/ddc/uniform_point_sampling.hpp
+1 −1 tests/chunk.cpp
+41 −0 tests/chunk_span.cpp
+6 −6 tests/discrete_domain.cpp
+44 −11 tests/fft/fft.cpp
+70 −0 tests/for_each.cpp
+25 −9 tests/parallel_transform_reduce.cpp
+116 −124 tests/splines/batched_2d_spline_builder.cpp
+66 −62 tests/splines/batched_spline_builder.cpp
+6 −6 tests/splines/evaluator_2d.hpp
+91 −87 tests/splines/extrapolation_rule.cpp
+2 −2 tests/splines/knots_as_interpolation_points.cpp
+19 −19 tests/splines/non_periodic_spline_builder.cpp
+11 −11 tests/splines/periodic_spline_builder.cpp
+6 −6 tests/splines/periodic_spline_builder_ordered_points.cpp
+38 −34 tests/splines/periodicity_spline_builder.cpp
+17 −17 tests/splines/spline_builder.cpp
+3 −3 tests/tagged_vector.cpp
+38 −0 tests/transform_reduce.cpp

0 comments on commit 3420667

Please sign in to comment.