From 3393b060f06cb00fe846387b652a4b38d56b23a8 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 24 Apr 2024 14:08:47 +1000 Subject: [PATCH] Convert device_memory_resource* to device_async_resource_ref (#4365) Closes #4333 For reviewers: Many of changes are simple textual replace of `rmm::mr::device_memory_resource *` with `rmm::device_async_resource_ref`. However, I found that `concurrent_unordered_map` used in cuGraph is not up to date with the latest changes from the version in libcudf, so I copied those over as well. This gets rid of the `hash_allocator.cuh`, and uses `rmm::mr::polymorphic_allocator` instead of the custom allocator previously used. This obviates the need to update any `device_memory_resource*` used in the old custom allocator. Authors: - Mark Harris (https://github.com/harrism) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4365 --- cpp/include/cugraph/algorithms.hpp | 4 +- cpp/include/cugraph/dendrogram.hpp | 5 +- cpp/include/cugraph/legacy/functions.hpp | 3 +- cpp/include/cugraph/legacy/graph.hpp | 19 ++-- .../include/hash/concurrent_unordered_map.cuh | 101 ++++++++++-------- .../include/hash/hash_allocator.cuh | 99 ----------------- .../include/hash/helper_functions.cuh | 2 + cpp/src/converters/legacy/COOtoCSR.cu | 16 +-- cpp/src/converters/legacy/COOtoCSR.cuh | 17 +-- cpp/src/tree/legacy/mst.cu | 9 +- cpp/tests/utilities/base_fixture.hpp | 5 +- 11 files changed, 102 insertions(+), 178 deletions(-) delete mode 100644 cpp/libcugraph_etl/include/hash/hash_allocator.cuh diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index c817665b1cb..0caa151daac 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -23,6 +23,8 @@ #include #include +#include + #ifndef NO_CUGRAPH_OPS #include #endif @@ -830,7 +832,7 @@ template std::unique_ptr> minimum_spanning_tree( raft::handle_t const& handle, legacy::GraphCSRView const& graph, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); namespace subgraph { /** diff --git a/cpp/include/cugraph/dendrogram.hpp b/cpp/include/cugraph/dendrogram.hpp index beebec4fd3f..14da3f13b1e 100644 --- a/cpp/include/cugraph/dendrogram.hpp +++ b/cpp/include/cugraph/dendrogram.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -28,7 +29,7 @@ class Dendrogram { void add_level(vertex_t first_index, vertex_t num_verts, rmm::cuda_stream_view stream_view, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { level_ptr_.push_back( std::make_unique>(num_verts, stream_view, mr)); diff --git a/cpp/include/cugraph/legacy/functions.hpp b/cpp/include/cugraph/legacy/functions.hpp index 1efa2294847..51f05a6d26d 100644 --- a/cpp/include/cugraph/legacy/functions.hpp +++ b/cpp/include/cugraph/legacy/functions.hpp @@ -20,6 +20,7 @@ #include #include +#include namespace cugraph { @@ -43,7 +44,7 @@ namespace cugraph { template std::unique_ptr> coo_to_csr( legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** * @brief Broadcast using handle communicator diff --git a/cpp/include/cugraph/legacy/graph.hpp b/cpp/include/cugraph/legacy/graph.hpp index e64cbf32d4c..19cd5bbd6d0 100644 --- a/cpp/include/cugraph/legacy/graph.hpp +++ b/cpp/include/cugraph/legacy/graph.hpp @@ -17,6 +17,7 @@ #include #include +#include #include @@ -349,9 +350,9 @@ class GraphCOO { */ GraphCOO(vertex_t number_of_vertices, edge_t number_of_edges, - bool has_data = false, - cudaStream_t stream = nullptr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + bool has_data = false, + cudaStream_t stream = nullptr, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) : number_of_vertices_p(number_of_vertices), number_of_edges_p(number_of_edges), src_indices_p(sizeof(vertex_t) * number_of_edges, stream, mr), @@ -361,8 +362,8 @@ class GraphCOO { } GraphCOO(GraphCOOView const& graph, - cudaStream_t stream = nullptr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + cudaStream_t stream = nullptr, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) : number_of_vertices_p(graph.number_of_vertices), number_of_edges_p(graph.number_of_edges), src_indices_p(graph.src_indices, graph.number_of_edges * sizeof(vertex_t), stream, mr), @@ -457,7 +458,7 @@ class GraphCompressedSparseBase { edge_t number_of_edges, bool has_data, cudaStream_t stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) : number_of_vertices_p(number_of_vertices), number_of_edges_p(number_of_edges), offsets_p(sizeof(edge_t) * (number_of_vertices + 1), stream, mr), @@ -525,9 +526,9 @@ class GraphCSR : public GraphCompressedSparseBase { */ GraphCSR(vertex_t number_of_vertices_, edge_t number_of_edges_, - bool has_data_ = false, - cudaStream_t stream = nullptr, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + bool has_data_ = false, + cudaStream_t stream = nullptr, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) : GraphCompressedSparseBase( number_of_vertices_, number_of_edges_, has_data_, stream, mr) { diff --git a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh index e71bc2b3548..0e89dbfb9c1 100644 --- a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh +++ b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh @@ -21,19 +21,19 @@ */ #pragma once -#include #include -#include -#include +#include #include +#include +#include #include -#include #include #include +#include #include #include #include @@ -78,8 +78,8 @@ template constexpr bool is_packable() { - return std::is_integral::value and std::is_integral::value and - not std::is_void>::value and + return std::is_integral_v and std::is_integral_v and + not std::is_void_v> and std::has_unique_object_representations_v; } @@ -95,8 +95,8 @@ union pair_packer; template union pair_packer()>> { using packed_type = packed_t; - packed_type const packed; - pair_type const pair; + packed_type packed; + pair_type pair; __device__ pair_packer(pair_type _pair) : pair{_pair} {} @@ -120,7 +120,7 @@ template , typename Equality = equal_to, - typename Allocator = default_allocator>> + typename Allocator = rmm::mr::polymorphic_allocator>> class concurrent_unordered_map { public: using size_type = size_t; @@ -131,7 +131,7 @@ class concurrent_unordered_map { using mapped_type = Element; using value_type = thrust::pair; using iterator = cycle_iterator_adapter; - using const_iterator = const cycle_iterator_adapter; + using const_iterator = cycle_iterator_adapter const; public: /** @@ -163,12 +163,12 @@ class concurrent_unordered_map { * storage */ static auto create(size_type capacity, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - const mapped_type unused_element = std::numeric_limits::max(), - const key_type unused_key = std::numeric_limits::max(), - const Hasher& hash_function = hasher(), - const Equality& equal = key_equal(), - const allocator_type& allocator = allocator_type()) + rmm::cuda_stream_view stream, + mapped_type const unused_element = std::numeric_limits::max(), + key_type const unused_key = std::numeric_limits::max(), + Hasher const& hash_function = hasher(), + Equality const& equal = key_equal(), + allocator_type const& allocator = allocator_type()) { using Self = concurrent_unordered_map; @@ -246,7 +246,7 @@ class concurrent_unordered_map { __host__ __device__ mapped_type get_unused_element() const { return m_unused_element; } - __host__ __device__ size_type capacity() const { return m_capacity; } + [[nodiscard]] __host__ __device__ size_type capacity() const { return m_capacity; } private: /** @@ -271,16 +271,21 @@ class concurrent_unordered_map { __device__ std::enable_if_t(), insert_result> attempt_insert( value_type* const __restrict__ insert_location, value_type const& insert_pair) { - pair_packer const unused{thrust::make_pair(m_unused_key, m_unused_element)}; - pair_packer const new_pair{insert_pair}; - pair_packer const old{ - atomicCAS(reinterpret_cast::packed_type*>(insert_location), - unused.packed, - new_pair.packed)}; + pair_packer expected{thrust::make_pair(m_unused_key, m_unused_element)}; + pair_packer desired{insert_pair}; - if (old.packed == unused.packed) { return insert_result::SUCCESS; } + using packed_type = typename pair_packer::packed_type; - if (m_equal(old.pair.first, insert_pair.first)) { return insert_result::DUPLICATE; } + auto* insert_ptr = reinterpret_cast(insert_location); + cuda::atomic_ref ref{*insert_ptr}; + auto const success = + ref.compare_exchange_strong(expected.packed, desired.packed, cuda::std::memory_order_relaxed); + + if (success) { + return insert_result::SUCCESS; + } else if (m_equal(expected.pair.first, insert_pair.first)) { + return insert_result::DUPLICATE; + } return insert_result::CONTINUE; } @@ -295,16 +300,20 @@ class concurrent_unordered_map { __device__ std::enable_if_t(), insert_result> attempt_insert( value_type* const __restrict__ insert_location, value_type const& insert_pair) { - key_type const old_key{atomicCAS(&(insert_location->first), m_unused_key, insert_pair.first)}; + auto expected = m_unused_key; + cuda::atomic_ref ref{insert_location->first}; + auto const key_success = + ref.compare_exchange_strong(expected, insert_pair.first, cuda::std::memory_order_relaxed); // Hash bucket empty - if (m_unused_key == old_key) { + if (key_success) { insert_location->second = insert_pair.second; return insert_result::SUCCESS; } - // Key already exists - if (m_equal(old_key, insert_pair.first)) { return insert_result::DUPLICATE; } + else if (m_equal(expected, insert_pair.first)) { + return insert_result::DUPLICATE; + } return insert_result::CONTINUE; } @@ -330,7 +339,7 @@ class concurrent_unordered_map { */ __device__ thrust::pair insert(value_type const& insert_pair) { - const size_type key_hash{m_hf(insert_pair.first)}; + size_type const key_hash{m_hf(insert_pair.first)}; size_type index{key_hash % m_capacity}; insert_result status{insert_result::CONTINUE}; @@ -343,7 +352,7 @@ class concurrent_unordered_map { index = (index + 1) % m_capacity; } - bool const insert_success = (status == insert_result::SUCCESS) ? true : false; + bool const insert_success = status == insert_result::SUCCESS; return thrust::make_pair( iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket), insert_success); @@ -424,8 +433,7 @@ class concurrent_unordered_map { } } - void assign_async(const concurrent_unordered_map& other, - rmm::cuda_stream_view stream = rmm::cuda_stream_default) + void assign_async(concurrent_unordered_map const& other, rmm::cuda_stream_view stream) { if (other.m_capacity <= m_capacity) { m_capacity = other.m_capacity; @@ -443,7 +451,7 @@ class concurrent_unordered_map { stream.value())); } - void clear_async(rmm::cuda_stream_view stream = rmm::cuda_stream_default) + void clear_async(rmm::cuda_stream_view stream) { constexpr int block_size = 128; init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>( @@ -458,7 +466,7 @@ class concurrent_unordered_map { } } - void prefetch(const int dev_id, rmm::cuda_stream_view stream = rmm::cuda_stream_default) + void prefetch(int const dev_id, rmm::cuda_stream_view stream) { cudaPointerAttributes hashtbl_values_ptr_attributes; cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values); @@ -478,7 +486,7 @@ class concurrent_unordered_map { * * @param stream CUDA stream used for device memory operations and kernel launches. */ - void destroy(rmm::cuda_stream_view stream = rmm::cuda_stream_default) + void destroy(rmm::cuda_stream_view stream) { m_allocator.deallocate(m_hashtbl_values, m_capacity, stream); delete this; @@ -514,12 +522,12 @@ class concurrent_unordered_map { * @param stream CUDA stream used for device memory operations and kernel launches. */ concurrent_unordered_map(size_type capacity, - const mapped_type unused_element, - const key_type unused_key, - const Hasher& hash_function, - const Equality& equal, - const allocator_type& allocator, - rmm::cuda_stream_view stream = rmm::cuda_stream_default) + mapped_type const unused_element, + key_type const unused_key, + Hasher const& hash_function, + Equality const& equal, + allocator_type const& allocator, + rmm::cuda_stream_view stream) : m_hf(hash_function), m_equal(equal), m_allocator(allocator), @@ -542,8 +550,11 @@ class concurrent_unordered_map { } } - init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>( - m_hashtbl_values, m_capacity, m_unused_key, m_unused_element); - RAFT_CUDA_TRY(cudaGetLastError()); + if (m_capacity > 0) { + init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>( + m_hashtbl_values, m_capacity, m_unused_key, m_unused_element); + } + + CUDF_CHECK_CUDA(stream.value()); } }; diff --git a/cpp/libcugraph_etl/include/hash/hash_allocator.cuh b/cpp/libcugraph_etl/include/hash/hash_allocator.cuh deleted file mode 100644 index a4c8c17e01b..00000000000 --- a/cpp/libcugraph_etl/include/hash/hash_allocator.cuh +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2017-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef HASH_ALLOCATOR_CUH -#define HASH_ALLOCATOR_CUH - -#include -#include -#include -#include - -#include - -template -struct managed_allocator { - typedef T value_type; - rmm::mr::device_memory_resource* mr = new rmm::mr::managed_memory_resource; - - managed_allocator() = default; - - template - constexpr managed_allocator(const managed_allocator&) noexcept - { - } - - T* allocate(std::size_t n, rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - return static_cast(mr->allocate(n * sizeof(T), stream)); - } - - void deallocate(T* p, - std::size_t n, - rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - mr->deallocate(p, n * sizeof(T), stream); - } -}; - -template -bool operator==(const managed_allocator&, const managed_allocator&) -{ - return true; -} -template -bool operator!=(const managed_allocator&, const managed_allocator&) -{ - return false; -} - -template -struct default_allocator { - typedef T value_type; - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); - - default_allocator() = default; - - template - constexpr default_allocator(const default_allocator&) noexcept - { - } - - T* allocate(std::size_t n, rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - return static_cast(mr->allocate(n * sizeof(T), stream)); - } - - void deallocate(T* p, - std::size_t n, - rmm::cuda_stream_view stream = rmm::cuda_stream_default) const - { - mr->deallocate(p, n * sizeof(T), stream); - } -}; - -template -bool operator==(const default_allocator&, const default_allocator&) -{ - return true; -} -template -bool operator!=(const default_allocator&, const default_allocator&) -{ - return false; -} - -#endif diff --git a/cpp/libcugraph_etl/include/hash/helper_functions.cuh b/cpp/libcugraph_etl/include/hash/helper_functions.cuh index 183516ae782..67b24b4937e 100644 --- a/cpp/libcugraph_etl/include/hash/helper_functions.cuh +++ b/cpp/libcugraph_etl/include/hash/helper_functions.cuh @@ -21,6 +21,8 @@ #include +#include + constexpr int64_t DEFAULT_HASH_TABLE_OCCUPANCY = 50; /** diff --git a/cpp/src/converters/legacy/COOtoCSR.cu b/cpp/src/converters/legacy/COOtoCSR.cu index 5e8e1604586..545016bc5ce 100644 --- a/cpp/src/converters/legacy/COOtoCSR.cu +++ b/cpp/src/converters/legacy/COOtoCSR.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,37 +16,39 @@ #include "COOtoCSR.cuh" +#include + namespace cugraph { // Explicit instantiation for uint32_t + float template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // Explicit instantiation for uint32_t + double template std::unique_ptr> coo_to_csr( - legacy::GraphCOOView const& graph, rmm::mr::device_memory_resource*); + legacy::GraphCOOView const& graph, rmm::device_async_resource_ref); // Explicit instantiation for int + float template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // Explicit instantiation for int + double template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // Explicit instantiation for int64_t + float template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // Explicit instantiation for int64_t + double template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // in-place versions: // diff --git a/cpp/src/converters/legacy/COOtoCSR.cuh b/cpp/src/converters/legacy/COOtoCSR.cuh index d2fc9fb2cc2..feae22b53a9 100644 --- a/cpp/src/converters/legacy/COOtoCSR.cuh +++ b/cpp/src/converters/legacy/COOtoCSR.cuh @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -133,7 +134,7 @@ rmm::device_buffer create_offset(VT* source, VT number_of_vertices, ET number_of_edges, rmm::cuda_stream_view stream_view, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { // Offset array needs an extra element at the end to contain the ending offsets // of the last vertex @@ -149,7 +150,7 @@ rmm::device_buffer create_offset(VT* source, template std::unique_ptr> coo_to_csr( - legacy::GraphCOOView const& graph, rmm::mr::device_memory_resource* mr) + legacy::GraphCOOView const& graph, rmm::device_async_resource_ref mr) { rmm::cuda_stream_view stream_view; @@ -195,32 +196,32 @@ void coo_to_csr_inplace(legacy::GraphCOOView& graph, // EIDecl for uint32_t + float extern template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // EIDecl for uint32_t + double extern template std::unique_ptr> coo_to_csr( - legacy::GraphCOOView const& graph, rmm::mr::device_memory_resource*); + legacy::GraphCOOView const& graph, rmm::device_async_resource_ref); // EIDecl for int + float extern template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // EIDecl for int + double extern template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // EIDecl for int64_t + float extern template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // EIDecl for int64_t + double extern template std::unique_ptr> coo_to_csr(legacy::GraphCOOView const& graph, - rmm::mr::device_memory_resource*); + rmm::device_async_resource_ref); // in-place versions: // diff --git a/cpp/src/tree/legacy/mst.cu b/cpp/src/tree/legacy/mst.cu index a7fdd45c4eb..a10d75680fd 100644 --- a/cpp/src/tree/legacy/mst.cu +++ b/cpp/src/tree/legacy/mst.cu @@ -21,6 +21,7 @@ #include #include +#include #include @@ -36,7 +37,7 @@ template std::unique_ptr> mst_impl( raft::handle_t const& handle, legacy::GraphCSRView const& graph, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto stream = handle.get_stream(); @@ -66,7 +67,7 @@ template std::unique_ptr> minimum_spanning_tree( raft::handle_t const& handle, legacy::GraphCSRView const& graph, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { return detail::mst_impl(handle, graph, mr); } @@ -74,9 +75,9 @@ std::unique_ptr> minimum_spanning_t template std::unique_ptr> minimum_spanning_tree( raft::handle_t const& handle, legacy::GraphCSRView const& graph, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); template std::unique_ptr> minimum_spanning_tree(raft::handle_t const& handle, legacy::GraphCSRView const& graph, - rmm::mr::device_memory_resource* mr); + rmm::device_async_resource_ref mr); } // namespace cugraph diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 5c984d7979b..cb302674a25 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -52,14 +53,14 @@ namespace test { * ``` **/ class BaseFixture : public ::testing::Test { - rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; + rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; public: /** * @brief Returns pointer to `device_memory_resource` that should be used for all tests inheriting *from this fixture **/ - rmm::mr::device_memory_resource* mr() { return _mr; } + rmm::device_async_resource_ref mr() { return _mr; } }; /// MR factory functions