/Users/andrewlamb/Software/datafusion/datafusion/execution/src/runtime_env.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Execution [`RuntimeEnv`] environment that manages access to object |
19 | | //! store, memory manager, disk manager. |
20 | | |
21 | | use crate::{ |
22 | | disk_manager::{DiskManager, DiskManagerConfig}, |
23 | | memory_pool::{ |
24 | | GreedyMemoryPool, MemoryPool, TrackConsumersPool, UnboundedMemoryPool, |
25 | | }, |
26 | | object_store::{DefaultObjectStoreRegistry, ObjectStoreRegistry}, |
27 | | }; |
28 | | |
29 | | use crate::cache::cache_manager::{CacheManager, CacheManagerConfig}; |
30 | | use datafusion_common::{DataFusionError, Result}; |
31 | | use object_store::ObjectStore; |
32 | | use std::path::PathBuf; |
33 | | use std::sync::Arc; |
34 | | use std::{ |
35 | | fmt::{Debug, Formatter}, |
36 | | num::NonZeroUsize, |
37 | | }; |
38 | | use url::Url; |
39 | | |
40 | | #[derive(Clone)] |
41 | | /// Execution runtime environment that manages system resources such |
42 | | /// as memory, disk, cache and storage. |
43 | | /// |
44 | | /// A [`RuntimeEnv`] is created from a [`RuntimeEnvBuilder`] and has the |
45 | | /// following resource management functionality: |
46 | | /// |
47 | | /// * [`MemoryPool`]: Manage memory |
48 | | /// * [`DiskManager`]: Manage temporary files on local disk |
49 | | /// * [`CacheManager`]: Manage temporary cache data during the session lifetime |
50 | | /// * [`ObjectStoreRegistry`]: Manage mapping URLs to object store instances |
51 | | pub struct RuntimeEnv { |
52 | | /// Runtime memory management |
53 | | pub memory_pool: Arc<dyn MemoryPool>, |
54 | | /// Manage temporary files during query execution |
55 | | pub disk_manager: Arc<DiskManager>, |
56 | | /// Manage temporary cache during query execution |
57 | | pub cache_manager: Arc<CacheManager>, |
58 | | /// Object Store Registry |
59 | | pub object_store_registry: Arc<dyn ObjectStoreRegistry>, |
60 | | } |
61 | | |
62 | | impl Debug for RuntimeEnv { |
63 | 0 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { |
64 | 0 | write!(f, "RuntimeEnv") |
65 | 0 | } |
66 | | } |
67 | | |
68 | | impl RuntimeEnv { |
69 | | #[deprecated(note = "please use `try_new` instead")] |
70 | 0 | pub fn new(config: RuntimeConfig) -> Result<Self> { |
71 | 0 | Self::try_new(config) |
72 | 0 | } |
73 | | /// Create env based on configuration |
74 | 0 | pub fn try_new(config: RuntimeConfig) -> Result<Self> { |
75 | 0 | let RuntimeConfig { |
76 | 0 | memory_pool, |
77 | 0 | disk_manager, |
78 | 0 | cache_manager, |
79 | 0 | object_store_registry, |
80 | 0 | } = config; |
81 | 0 |
|
82 | 0 | let memory_pool = |
83 | 0 | memory_pool.unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default())); |
84 | 0 |
|
85 | 0 | Ok(Self { |
86 | 0 | memory_pool, |
87 | 0 | disk_manager: DiskManager::try_new(disk_manager)?, |
88 | 0 | cache_manager: CacheManager::try_new(&cache_manager)?, |
89 | 0 | object_store_registry, |
90 | | }) |
91 | 0 | } |
92 | | |
93 | | /// Registers a custom `ObjectStore` to be used with a specific url. |
94 | | /// This allows DataFusion to create external tables from urls that do not have |
95 | | /// built in support such as `hdfs://namenode:port/...`. |
96 | | /// |
97 | | /// Returns the [`ObjectStore`] previously registered for this |
98 | | /// scheme, if any. |
99 | | /// |
100 | | /// See [`ObjectStoreRegistry`] for more details |
101 | | /// |
102 | | /// # Example: Register local file system object store |
103 | | /// ``` |
104 | | /// # use std::sync::Arc; |
105 | | /// # use url::Url; |
106 | | /// # use datafusion_execution::runtime_env::RuntimeEnv; |
107 | | /// # let runtime_env = RuntimeEnv::try_new(Default::default()).unwrap(); |
108 | | /// let url = Url::try_from("file://").unwrap(); |
109 | | /// let object_store = object_store::local::LocalFileSystem::new(); |
110 | | /// // register the object store with the runtime environment |
111 | | /// runtime_env.register_object_store(&url, Arc::new(object_store)); |
112 | | /// ``` |
113 | | /// |
114 | | /// # Example: Register local file system object store |
115 | | /// |
116 | | /// To register reading from urls such as <https://github.com>` |
117 | | /// |
118 | | /// ``` |
119 | | /// # use std::sync::Arc; |
120 | | /// # use url::Url; |
121 | | /// # use datafusion_execution::runtime_env::RuntimeEnv; |
122 | | /// # let runtime_env = RuntimeEnv::try_new(Default::default()).unwrap(); |
123 | | /// # // use local store for example as http feature is not enabled |
124 | | /// # let http_store = object_store::local::LocalFileSystem::new(); |
125 | | /// // create a new object store via object_store::http::HttpBuilder; |
126 | | /// let base_url = Url::parse("https://github.com").unwrap(); |
127 | | /// // let http_store = HttpBuilder::new() |
128 | | /// // .with_url(base_url.clone()) |
129 | | /// // .build() |
130 | | /// // .unwrap(); |
131 | | /// // register the object store with the runtime environment |
132 | | /// runtime_env.register_object_store(&base_url, Arc::new(http_store)); |
133 | | /// ``` |
134 | 0 | pub fn register_object_store( |
135 | 0 | &self, |
136 | 0 | url: &Url, |
137 | 0 | object_store: Arc<dyn ObjectStore>, |
138 | 0 | ) -> Option<Arc<dyn ObjectStore>> { |
139 | 0 | self.object_store_registry.register_store(url, object_store) |
140 | 0 | } |
141 | | |
142 | | /// Retrieves a `ObjectStore` instance for a url by consulting the |
143 | | /// registry. See [`ObjectStoreRegistry::get_store`] for more |
144 | | /// details. |
145 | 0 | pub fn object_store(&self, url: impl AsRef<Url>) -> Result<Arc<dyn ObjectStore>> { |
146 | 0 | self.object_store_registry |
147 | 0 | .get_store(url.as_ref()) |
148 | 0 | .map_err(DataFusionError::from) |
149 | 0 | } |
150 | | } |
151 | | |
152 | | impl Default for RuntimeEnv { |
153 | 0 | fn default() -> Self { |
154 | 0 | RuntimeEnvBuilder::new().build().unwrap() |
155 | 0 | } |
156 | | } |
157 | | |
158 | | /// Please see: <https://github.com/apache/datafusion/issues/12156> |
159 | | /// This a type alias for backwards compatibility. |
160 | | pub type RuntimeConfig = RuntimeEnvBuilder; |
161 | | |
162 | | #[derive(Clone)] |
163 | | /// Execution runtime configuration |
164 | | pub struct RuntimeEnvBuilder { |
165 | | /// DiskManager to manage temporary disk file usage |
166 | | pub disk_manager: DiskManagerConfig, |
167 | | /// [`MemoryPool`] from which to allocate memory |
168 | | /// |
169 | | /// Defaults to using an [`UnboundedMemoryPool`] if `None` |
170 | | pub memory_pool: Option<Arc<dyn MemoryPool>>, |
171 | | /// CacheManager to manage cache data |
172 | | pub cache_manager: CacheManagerConfig, |
173 | | /// ObjectStoreRegistry to get object store based on url |
174 | | pub object_store_registry: Arc<dyn ObjectStoreRegistry>, |
175 | | } |
176 | | |
177 | | impl Default for RuntimeEnvBuilder { |
178 | 14 | fn default() -> Self { |
179 | 14 | Self::new() |
180 | 14 | } |
181 | | } |
182 | | |
183 | | impl RuntimeEnvBuilder { |
184 | | /// New with default values |
185 | 937 | pub fn new() -> Self { |
186 | 937 | Self { |
187 | 937 | disk_manager: Default::default(), |
188 | 937 | memory_pool: Default::default(), |
189 | 937 | cache_manager: Default::default(), |
190 | 937 | object_store_registry: Arc::new(DefaultObjectStoreRegistry::default()), |
191 | 937 | } |
192 | 937 | } |
193 | | |
194 | | /// Customize disk manager |
195 | 4 | pub fn with_disk_manager(mut self, disk_manager: DiskManagerConfig) -> Self { |
196 | 4 | self.disk_manager = disk_manager; |
197 | 4 | self |
198 | 4 | } |
199 | | |
200 | | /// Customize memory policy |
201 | 46 | pub fn with_memory_pool(mut self, memory_pool: Arc<dyn MemoryPool>) -> Self { |
202 | 46 | self.memory_pool = Some(memory_pool); |
203 | 46 | self |
204 | 46 | } |
205 | | |
206 | | /// Customize cache policy |
207 | 0 | pub fn with_cache_manager(mut self, cache_manager: CacheManagerConfig) -> Self { |
208 | 0 | self.cache_manager = cache_manager; |
209 | 0 | self |
210 | 0 | } |
211 | | |
212 | | /// Customize object store registry |
213 | 0 | pub fn with_object_store_registry( |
214 | 0 | mut self, |
215 | 0 | object_store_registry: Arc<dyn ObjectStoreRegistry>, |
216 | 0 | ) -> Self { |
217 | 0 | self.object_store_registry = object_store_registry; |
218 | 0 | self |
219 | 0 | } |
220 | | |
221 | | /// Specify the total memory to use while running the DataFusion |
222 | | /// plan to `max_memory * memory_fraction` in bytes. |
223 | | /// |
224 | | /// This defaults to using [`GreedyMemoryPool`] |
225 | | /// |
226 | | /// Note DataFusion does not yet respect this limit in all cases. |
227 | 34 | pub fn with_memory_limit(self, max_memory: usize, memory_fraction: f64) -> Self { |
228 | 34 | let pool_size = (max_memory as f64 * memory_fraction) as usize; |
229 | 34 | self.with_memory_pool(Arc::new(TrackConsumersPool::new( |
230 | 34 | GreedyMemoryPool::new(pool_size), |
231 | 34 | NonZeroUsize::new(5).unwrap(), |
232 | 34 | ))) |
233 | 34 | } |
234 | | |
235 | | /// Use the specified path to create any needed temporary files |
236 | 0 | pub fn with_temp_file_path(self, path: impl Into<PathBuf>) -> Self { |
237 | 0 | self.with_disk_manager(DiskManagerConfig::new_specified(vec![path.into()])) |
238 | 0 | } |
239 | | |
240 | | /// Build a RuntimeEnv |
241 | 937 | pub fn build(self) -> Result<RuntimeEnv> { |
242 | 937 | let memory_pool = self |
243 | 937 | .memory_pool |
244 | 937 | .unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default())891 ); |
245 | 937 | |
246 | 937 | Ok(RuntimeEnv { |
247 | 937 | memory_pool, |
248 | 937 | disk_manager: DiskManager::try_new(self.disk_manager)?0 , |
249 | 937 | cache_manager: CacheManager::try_new(&self.cache_manager)?0 , |
250 | 937 | object_store_registry: self.object_store_registry, |
251 | | }) |
252 | 937 | } |
253 | | |
254 | | /// Convenience method to create a new `Arc<RuntimeEnv>` |
255 | 937 | pub fn build_arc(self) -> Result<Arc<RuntimeEnv>> { |
256 | 937 | self.build().map(Arc::new) |
257 | 937 | } |
258 | | } |