From 222d535eace618c4cc2dbf092a89893229b23c2d Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Mon, 15 Apr 2024 15:53:05 -0400 Subject: [PATCH] Try UnrolledUtilities --- src/MatrixFields/matrix_multiplication.jl | 94 +++++++++++------------ 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/src/MatrixFields/matrix_multiplication.jl b/src/MatrixFields/matrix_multiplication.jl index 5e6146f5ef..ba0a9d24f8 100644 --- a/src/MatrixFields/matrix_multiplication.jl +++ b/src/MatrixFields/matrix_multiplication.jl @@ -306,35 +306,27 @@ boundary_modified_ud(_, ud, column_space, i) = ud boundary_modified_ud(::BottomRightMatrixCorner, ud, column_space, i) = min(Operators.right_idx(column_space) - i, ud) -# matrix_rows(ld1, ud1, (bc, boundary_modified_ld1, boundary_modified_ud1, space, matrix2, loc, idx, hidx)) -@inline matrix_rows(ld1, ud1, args) = - _matrix_rows(ntuple(i -> i + ld1 - 1, ud1 - ld1 + 1), args) -Base.@propagate_inbounds function _matrix_rows( - d::Union{Int, PlusHalf{Int}}, - args, +Base.@propagate_inbounds function prod_entry( + space, + idx, + hidx, + min_d, + max_d, + zero_value, + matrix1_row, + matrix2_rows_wrapper, + prod_d, ) - ( - bc, - boundary_modified_ld1, - boundary_modified_ud1, - space, - matrix2, - loc, - idx, - hidx, - ) = args - if isnothing(bc) || boundary_modified_ld1 <= d <= boundary_modified_ud1 - @inbounds Operators.getidx(space, matrix2, loc, idx + d, hidx) - else - zero(eltype(matrix2)) # This row is outside the matrix. - end + val = zero_value + @inbounds for d in min_d:max_d + Base.@_inline_meta + @inbounds value1 = matrix1_row[d] + @inbounds value2 = matrix2_rows_wrapper[d][prod_d - d] + @inbounds value2_lg = Geometry.LocalGeometry(space, idx + d, hidx) + val = radd(val, rmul_with_projection(value1, value2, value2_lg)) + end # Using a for-loop is currently faster than using mapreduce. + return val end -Base.@propagate_inbounds _matrix_rows(tup::Tuple, args) = - (_matrix_rows(first(tup), args), _matrix_rows(Base.tail(tup), args)...) -Base.@propagate_inbounds _matrix_rows(tup::Tuple{<:Any}, args) = - (_matrix_rows(first(tup), args),) -@inline _matrix_rows(tup::Tuple{}, args) = () - import UnrolledUtilities as UU # TODO: Use @propagate_inbounds here, and remove @inbounds from this function. @@ -372,15 +364,15 @@ function multiply_matrix_at_index(loc, space, idx, hidx, matrix1, arg, bc) # of as a map from boundary_modified_ld1 to boundary_modified_ud1. For # simplicity, use zero padding for rows that are outside the matrix. # Wrap the rows in a BandMatrixRow so that they can be easily indexed. - # matrix2_rows = matrix_rows(ld1, ud1, (bc, boundary_modified_ld1, boundary_modified_ud1, space, matrix2, loc, idx, hidx)) - matrix2_rows = UU.unrolled_map((ld1:ud1...,)) do d - # TODO: Use @propagate_inbounds_meta instead of @inline_meta. - Base.@_inline_meta + zero_mat2 = zero(eltype(matrix2)) + nt_mat = ntuple(ξ -> ξ + ld1 - 1, ud1 - ld1 + 1) + matrix2_rows = UU.unrolled_map(nt_mat) do d + @inline if isnothing(bc) || boundary_modified_ld1 <= d <= boundary_modified_ud1 @inbounds Operators.getidx(space, matrix2, loc, idx + d, hidx) else - zero(eltype(matrix2)) # This row is outside the matrix. + zero_mat2 # This row is outside the matrix. end end matrix2_rows_wrapper = BandMatrixRow{ld1}(matrix2_rows...) @@ -395,24 +387,26 @@ function multiply_matrix_at_index(loc, space, idx, hidx, matrix1, arg, bc) # to boundary_modified_prod_ud. For simplicity, use zero padding for # entries that are outside the matrix. Wrap the entries in a # BandMatrixRow before returning them. - prod_entries = map((prod_ld:prod_ud...,)) do prod_d - # TODO: Use @propagate_inbounds_meta instead of @inline_meta. - Base.@_inline_meta + nt_pe = ntuple(ξ -> ξ + prod_ld - 1, prod_ud - prod_ld + 1) + prod_entries = UU.unrolled_map(nt_pe) do prod_d + @inline if isnothing(bc) || boundary_modified_prod_ld <= prod_d <= boundary_modified_prod_ud - prod_entry = zero_value min_d = max(boundary_modified_ld1, prod_d - ud2) max_d = min(boundary_modified_ud1, prod_d - ld2) + val = zero_value @inbounds for d in min_d:max_d - value1 = matrix1_row[d] - value2 = matrix2_rows_wrapper[d][prod_d - d] - value2_lg = Geometry.LocalGeometry(space, idx + d, hidx) - prod_entry = radd( - prod_entry, + Base.@_inline_meta + @inbounds value1 = matrix1_row[d] + @inbounds value2 = matrix2_rows_wrapper[d][prod_d - d] + @inbounds value2_lg = + Geometry.LocalGeometry(space, idx + d, hidx) + val = radd( + val, rmul_with_projection(value1, value2, value2_lg), ) end # Using a for-loop is currently faster than using mapreduce. - prod_entry + val else zero_value # This entry is outside the matrix. end @@ -420,11 +414,18 @@ function multiply_matrix_at_index(loc, space, idx, hidx, matrix1, arg, bc) return BandMatrixRow{prod_ld}(prod_entries...) else # matrix-vector multiplication vector = arg + # @inbounds for d in boundary_modified_ld1:boundary_modified_ud1 + nt_pv = ntuple( + ξ -> ξ + boundary_modified_ld1 - 1, + boundary_modified_ud1 - boundary_modified_ld1 + 1, + ) prod_value = rzero(prod_type) @inbounds for d in boundary_modified_ld1:boundary_modified_ud1 - value1 = matrix1_row[d] - value2 = Operators.getidx(space, vector, loc, idx + d, hidx) - value2_lg = Geometry.LocalGeometry(space, idx + d, hidx) + Base.@_inline_meta + @inbounds value1 = matrix1_row[d] + @inbounds value2 = + Operators.getidx(space, vector, loc, idx + d, hidx) + @inbounds value2_lg = Geometry.LocalGeometry(space, idx + d, hidx) prod_value = radd( prod_value, rmul_with_projection(value1, value2, value2_lg), @@ -475,7 +476,4 @@ if hasfield(Method, :recursion_relation) for m in methods(multiply_matrix_at_index) m.recursion_relation = dont_limit end - for m in methods(matrix_rows) - m.recursion_relation = dont_limit - end end