Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/execution/src/runtime_env.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Execution [`RuntimeEnv`] environment that manages access to object
19
//! store, memory manager, disk manager.
20
21
use crate::{
22
    disk_manager::{DiskManager, DiskManagerConfig},
23
    memory_pool::{
24
        GreedyMemoryPool, MemoryPool, TrackConsumersPool, UnboundedMemoryPool,
25
    },
26
    object_store::{DefaultObjectStoreRegistry, ObjectStoreRegistry},
27
};
28
29
use crate::cache::cache_manager::{CacheManager, CacheManagerConfig};
30
use datafusion_common::{DataFusionError, Result};
31
use object_store::ObjectStore;
32
use std::path::PathBuf;
33
use std::sync::Arc;
34
use std::{
35
    fmt::{Debug, Formatter},
36
    num::NonZeroUsize,
37
};
38
use url::Url;
39
40
#[derive(Clone)]
41
/// Execution runtime environment that manages system resources such
42
/// as memory, disk, cache and storage.
43
///
44
/// A [`RuntimeEnv`] is created from a [`RuntimeEnvBuilder`] and has the
45
/// following resource management functionality:
46
///
47
/// * [`MemoryPool`]: Manage memory
48
/// * [`DiskManager`]: Manage temporary files on local disk
49
/// * [`CacheManager`]: Manage temporary cache data during the session lifetime
50
/// * [`ObjectStoreRegistry`]: Manage mapping URLs to object store instances
51
pub struct RuntimeEnv {
52
    /// Runtime memory management
53
    pub memory_pool: Arc<dyn MemoryPool>,
54
    /// Manage temporary files during query execution
55
    pub disk_manager: Arc<DiskManager>,
56
    /// Manage temporary cache during query execution
57
    pub cache_manager: Arc<CacheManager>,
58
    /// Object Store Registry
59
    pub object_store_registry: Arc<dyn ObjectStoreRegistry>,
60
}
61
62
impl Debug for RuntimeEnv {
63
0
    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
64
0
        write!(f, "RuntimeEnv")
65
0
    }
66
}
67
68
impl RuntimeEnv {
69
    #[deprecated(note = "please use `try_new` instead")]
70
0
    pub fn new(config: RuntimeConfig) -> Result<Self> {
71
0
        Self::try_new(config)
72
0
    }
73
    /// Create env based on configuration
74
0
    pub fn try_new(config: RuntimeConfig) -> Result<Self> {
75
0
        let RuntimeConfig {
76
0
            memory_pool,
77
0
            disk_manager,
78
0
            cache_manager,
79
0
            object_store_registry,
80
0
        } = config;
81
0
82
0
        let memory_pool =
83
0
            memory_pool.unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default()));
84
0
85
0
        Ok(Self {
86
0
            memory_pool,
87
0
            disk_manager: DiskManager::try_new(disk_manager)?,
88
0
            cache_manager: CacheManager::try_new(&cache_manager)?,
89
0
            object_store_registry,
90
        })
91
0
    }
92
93
    /// Registers a custom `ObjectStore` to be used with a specific url.
94
    /// This allows DataFusion to create external tables from urls that do not have
95
    /// built in support such as `hdfs://namenode:port/...`.
96
    ///
97
    /// Returns the [`ObjectStore`] previously registered for this
98
    /// scheme, if any.
99
    ///
100
    /// See [`ObjectStoreRegistry`] for more details
101
    ///
102
    /// # Example: Register local file system object store
103
    /// ```
104
    /// # use std::sync::Arc;
105
    /// # use url::Url;
106
    /// # use datafusion_execution::runtime_env::RuntimeEnv;
107
    /// # let runtime_env = RuntimeEnv::try_new(Default::default()).unwrap();
108
    /// let url = Url::try_from("file://").unwrap();
109
    /// let object_store = object_store::local::LocalFileSystem::new();
110
    /// // register the object store with the runtime environment
111
    /// runtime_env.register_object_store(&url, Arc::new(object_store));
112
    /// ```
113
    ///
114
    /// # Example: Register local file system object store
115
    ///
116
    /// To register reading from urls such as <https://github.com>`
117
    ///
118
    /// ```
119
    /// # use std::sync::Arc;
120
    /// # use url::Url;
121
    /// # use datafusion_execution::runtime_env::RuntimeEnv;
122
    /// # let runtime_env = RuntimeEnv::try_new(Default::default()).unwrap();
123
    /// # // use local store for example as http feature is not enabled
124
    /// # let http_store = object_store::local::LocalFileSystem::new();
125
    /// // create a new object store via object_store::http::HttpBuilder;
126
    /// let base_url = Url::parse("https://github.com").unwrap();
127
    /// // let http_store = HttpBuilder::new()
128
    /// //    .with_url(base_url.clone())
129
    /// //    .build()
130
    /// //    .unwrap();
131
    /// // register the object store with the runtime environment
132
    /// runtime_env.register_object_store(&base_url, Arc::new(http_store));
133
    /// ```
134
0
    pub fn register_object_store(
135
0
        &self,
136
0
        url: &Url,
137
0
        object_store: Arc<dyn ObjectStore>,
138
0
    ) -> Option<Arc<dyn ObjectStore>> {
139
0
        self.object_store_registry.register_store(url, object_store)
140
0
    }
141
142
    /// Retrieves a `ObjectStore` instance for a url by consulting the
143
    /// registry. See [`ObjectStoreRegistry::get_store`] for more
144
    /// details.
145
0
    pub fn object_store(&self, url: impl AsRef<Url>) -> Result<Arc<dyn ObjectStore>> {
146
0
        self.object_store_registry
147
0
            .get_store(url.as_ref())
148
0
            .map_err(DataFusionError::from)
149
0
    }
150
}
151
152
impl Default for RuntimeEnv {
153
0
    fn default() -> Self {
154
0
        RuntimeEnvBuilder::new().build().unwrap()
155
0
    }
156
}
157
158
/// Please see: <https://github.com/apache/datafusion/issues/12156>
159
/// This a type alias for backwards compatibility.
160
pub type RuntimeConfig = RuntimeEnvBuilder;
161
162
#[derive(Clone)]
163
/// Execution runtime configuration
164
pub struct RuntimeEnvBuilder {
165
    /// DiskManager to manage temporary disk file usage
166
    pub disk_manager: DiskManagerConfig,
167
    /// [`MemoryPool`] from which to allocate memory
168
    ///
169
    /// Defaults to using an [`UnboundedMemoryPool`] if `None`
170
    pub memory_pool: Option<Arc<dyn MemoryPool>>,
171
    /// CacheManager to manage cache data
172
    pub cache_manager: CacheManagerConfig,
173
    /// ObjectStoreRegistry to get object store based on url
174
    pub object_store_registry: Arc<dyn ObjectStoreRegistry>,
175
}
176
177
impl Default for RuntimeEnvBuilder {
178
14
    fn default() -> Self {
179
14
        Self::new()
180
14
    }
181
}
182
183
impl RuntimeEnvBuilder {
184
    /// New with default values
185
937
    pub fn new() -> Self {
186
937
        Self {
187
937
            disk_manager: Default::default(),
188
937
            memory_pool: Default::default(),
189
937
            cache_manager: Default::default(),
190
937
            object_store_registry: Arc::new(DefaultObjectStoreRegistry::default()),
191
937
        }
192
937
    }
193
194
    /// Customize disk manager
195
4
    pub fn with_disk_manager(mut self, disk_manager: DiskManagerConfig) -> Self {
196
4
        self.disk_manager = disk_manager;
197
4
        self
198
4
    }
199
200
    /// Customize memory policy
201
46
    pub fn with_memory_pool(mut self, memory_pool: Arc<dyn MemoryPool>) -> Self {
202
46
        self.memory_pool = Some(memory_pool);
203
46
        self
204
46
    }
205
206
    /// Customize cache policy
207
0
    pub fn with_cache_manager(mut self, cache_manager: CacheManagerConfig) -> Self {
208
0
        self.cache_manager = cache_manager;
209
0
        self
210
0
    }
211
212
    /// Customize object store registry
213
0
    pub fn with_object_store_registry(
214
0
        mut self,
215
0
        object_store_registry: Arc<dyn ObjectStoreRegistry>,
216
0
    ) -> Self {
217
0
        self.object_store_registry = object_store_registry;
218
0
        self
219
0
    }
220
221
    /// Specify the total memory to use while running the DataFusion
222
    /// plan to `max_memory * memory_fraction` in bytes.
223
    ///
224
    /// This defaults to using [`GreedyMemoryPool`]
225
    ///
226
    /// Note DataFusion does not yet respect this limit in all cases.
227
34
    pub fn with_memory_limit(self, max_memory: usize, memory_fraction: f64) -> Self {
228
34
        let pool_size = (max_memory as f64 * memory_fraction) as usize;
229
34
        self.with_memory_pool(Arc::new(TrackConsumersPool::new(
230
34
            GreedyMemoryPool::new(pool_size),
231
34
            NonZeroUsize::new(5).unwrap(),
232
34
        )))
233
34
    }
234
235
    /// Use the specified path to create any needed temporary files
236
0
    pub fn with_temp_file_path(self, path: impl Into<PathBuf>) -> Self {
237
0
        self.with_disk_manager(DiskManagerConfig::new_specified(vec![path.into()]))
238
0
    }
239
240
    /// Build a RuntimeEnv
241
937
    pub fn build(self) -> Result<RuntimeEnv> {
242
937
        let memory_pool = self
243
937
            .memory_pool
244
937
            .unwrap_or_else(|| 
Arc::new(UnboundedMemoryPool::default())891
);
245
937
246
937
        Ok(RuntimeEnv {
247
937
            memory_pool,
248
937
            disk_manager: DiskManager::try_new(self.disk_manager)
?0
,
249
937
            cache_manager: CacheManager::try_new(&self.cache_manager)
?0
,
250
937
            object_store_registry: self.object_store_registry,
251
        })
252
937
    }
253
254
    /// Convenience method to create a new `Arc<RuntimeEnv>`
255
937
    pub fn build_arc(self) -> Result<Arc<RuntimeEnv>> {
256
937
        self.build().map(Arc::new)
257
937
    }
258
}