/Users/andrewlamb/Software/datafusion/datafusion/common/src/utils/proxy.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! [`VecAllocExt`] and [`RawTableAllocExt`] to help tracking of memory allocations |
19 | | |
20 | | use hashbrown::raw::{Bucket, RawTable}; |
21 | | |
22 | | /// Extension trait for [`Vec`] to account for allocations. |
23 | | pub trait VecAllocExt { |
24 | | /// Item type. |
25 | | type T; |
26 | | |
27 | | /// [Push](Vec::push) new element to vector and increase |
28 | | /// `accounting` by any newly allocated bytes. |
29 | | /// |
30 | | /// Note that allocation counts capacity, not size |
31 | | /// |
32 | | /// # Example: |
33 | | /// ``` |
34 | | /// # use datafusion_common::utils::proxy::VecAllocExt; |
35 | | /// // use allocated to incrementally track how much memory is allocated in the vec |
36 | | /// let mut allocated = 0; |
37 | | /// let mut vec = Vec::new(); |
38 | | /// // Push data into the vec and the accounting will be updated to reflect |
39 | | /// // memory allocation |
40 | | /// vec.push_accounted(1, &mut allocated); |
41 | | /// assert_eq!(allocated, 16); // space for 4 u32s |
42 | | /// vec.push_accounted(1, &mut allocated); |
43 | | /// assert_eq!(allocated, 16); // no new allocation needed |
44 | | /// |
45 | | /// // push more data into the vec |
46 | | /// for _ in 0..10 { vec.push_accounted(1, &mut allocated); } |
47 | | /// assert_eq!(allocated, 64); // underlying vec has space for 10 u32s |
48 | | /// assert_eq!(vec.allocated_size(), 64); |
49 | | /// ``` |
50 | | /// # Example with other allocations: |
51 | | /// ``` |
52 | | /// # use datafusion_common::utils::proxy::VecAllocExt; |
53 | | /// // You can use the same allocated size to track memory allocated by |
54 | | /// // another source. For example |
55 | | /// let mut allocated = 27; |
56 | | /// let mut vec = Vec::new(); |
57 | | /// vec.push_accounted(1, &mut allocated); // allocates 16 bytes for vec |
58 | | /// assert_eq!(allocated, 43); // 16 bytes for vec, 27 bytes for other |
59 | | /// ``` |
60 | | fn push_accounted(&mut self, x: Self::T, accounting: &mut usize); |
61 | | |
62 | | /// Return the amount of memory allocated by this Vec to store elements |
63 | | /// (`size_of<T> * capacity`). |
64 | | /// |
65 | | /// Note this calculation is not recursive, and does not include any heap |
66 | | /// allocations contained within the Vec's elements. Does not include the |
67 | | /// size of `self` |
68 | | /// |
69 | | /// # Example: |
70 | | /// ``` |
71 | | /// # use datafusion_common::utils::proxy::VecAllocExt; |
72 | | /// let mut vec = Vec::new(); |
73 | | /// // Push data into the vec and the accounting will be updated to reflect |
74 | | /// // memory allocation |
75 | | /// vec.push(1); |
76 | | /// assert_eq!(vec.allocated_size(), 16); // space for 4 u32s |
77 | | /// vec.push(1); |
78 | | /// assert_eq!(vec.allocated_size(), 16); // no new allocation needed |
79 | | /// |
80 | | /// // push more data into the vec |
81 | | /// for _ in 0..10 { vec.push(1); } |
82 | | /// assert_eq!(vec.allocated_size(), 64); // space for 64 now |
83 | | /// ``` |
84 | | fn allocated_size(&self) -> usize; |
85 | | } |
86 | | |
87 | | impl<T> VecAllocExt for Vec<T> { |
88 | | type T = T; |
89 | | |
90 | 0 | fn push_accounted(&mut self, x: Self::T, accounting: &mut usize) { |
91 | 0 | let prev_capacty = self.capacity(); |
92 | 0 | self.push(x); |
93 | 0 | let new_capacity = self.capacity(); |
94 | 0 | if new_capacity > prev_capacty { |
95 | 0 | // capacity changed, so we allocated more |
96 | 0 | let bump_size = (new_capacity - prev_capacty) * std::mem::size_of::<T>(); |
97 | 0 | // Note multiplication should never overflow because `push` would |
98 | 0 | // have panic'd first, but the checked_add could potentially |
99 | 0 | // overflow since accounting could be tracking additional values, and |
100 | 0 | // could be greater than what is stored in the Vec |
101 | 0 | *accounting = (*accounting).checked_add(bump_size).expect("overflow"); |
102 | 0 | } |
103 | 0 | } |
104 | 834 | fn allocated_size(&self) -> usize { |
105 | 834 | std::mem::size_of::<T>() * self.capacity() |
106 | 834 | } |
107 | | } |
108 | | |
109 | | /// Extension trait for hash browns [`RawTable`] to account for allocations. |
110 | | pub trait RawTableAllocExt { |
111 | | /// Item type. |
112 | | type T; |
113 | | |
114 | | /// [Insert](RawTable::insert) new element into table and increase |
115 | | /// `accounting` by any newly allocated bytes. |
116 | | /// |
117 | | /// Returns the bucket where the element was inserted. |
118 | | /// Note that allocation counts capacity, not size. |
119 | | /// |
120 | | /// # Example: |
121 | | /// ``` |
122 | | /// # use datafusion_common::utils::proxy::RawTableAllocExt; |
123 | | /// # use hashbrown::raw::RawTable; |
124 | | /// let mut table = RawTable::new(); |
125 | | /// let mut allocated = 0; |
126 | | /// let hash_fn = |x: &u32| (*x as u64) % 1000; |
127 | | /// // pretend 0x3117 is the hash value for 1 |
128 | | /// table.insert_accounted(1, hash_fn, &mut allocated); |
129 | | /// assert_eq!(allocated, 64); |
130 | | /// |
131 | | /// // insert more values |
132 | | /// for i in 0..100 { table.insert_accounted(i, hash_fn, &mut allocated); } |
133 | | /// assert_eq!(allocated, 400); |
134 | | /// ``` |
135 | | fn insert_accounted( |
136 | | &mut self, |
137 | | x: Self::T, |
138 | | hasher: impl Fn(&Self::T) -> u64, |
139 | | accounting: &mut usize, |
140 | | ) -> Bucket<Self::T>; |
141 | | } |
142 | | |
143 | | impl<T> RawTableAllocExt for RawTable<T> { |
144 | | type T = T; |
145 | | |
146 | 165 | fn insert_accounted( |
147 | 165 | &mut self, |
148 | 165 | x: Self::T, |
149 | 165 | hasher: impl Fn(&Self::T) -> u64, |
150 | 165 | accounting: &mut usize, |
151 | 165 | ) -> Bucket<Self::T> { |
152 | 165 | let hash = hasher(&x); |
153 | 165 | |
154 | 165 | match self.try_insert_no_grow(hash, x) { |
155 | 151 | Ok(bucket) => bucket, |
156 | 14 | Err(x) => { |
157 | 14 | // need to request more memory |
158 | 14 | |
159 | 14 | let bump_elements = self.capacity().max(16); |
160 | 14 | let bump_size = bump_elements * std::mem::size_of::<T>(); |
161 | 14 | *accounting = (*accounting).checked_add(bump_size).expect("overflow"); |
162 | 14 | |
163 | 14 | self.reserve(bump_elements, hasher); |
164 | 14 | |
165 | 14 | // still need to insert the element since first try failed |
166 | 14 | // Note: cannot use `.expect` here because `T` may not implement `Debug` |
167 | 14 | match self.try_insert_no_grow(hash, x) { |
168 | 14 | Ok(bucket) => bucket, |
169 | 0 | Err(_) => panic!("just grew the container"), |
170 | | } |
171 | | } |
172 | | } |
173 | 165 | } |
174 | | } |