Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/common/src/functional_dependencies.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! FunctionalDependencies keeps track of functional dependencies
19
//! inside DFSchema.
20
21
use std::collections::HashSet;
22
use std::fmt::{Display, Formatter};
23
use std::ops::Deref;
24
use std::vec::IntoIter;
25
26
use crate::error::_plan_err;
27
use crate::utils::{merge_and_order_indices, set_difference};
28
use crate::{DFSchema, DFSchemaRef, DataFusionError, JoinType, Result};
29
30
use sqlparser::ast::TableConstraint;
31
32
/// This object defines a constraint on a table.
33
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
34
pub enum Constraint {
35
    /// Columns with the given indices form a composite primary key (they are
36
    /// jointly unique and not nullable):
37
    PrimaryKey(Vec<usize>),
38
    /// Columns with the given indices form a composite unique key:
39
    Unique(Vec<usize>),
40
}
41
42
/// This object encapsulates a list of functional constraints:
43
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
44
pub struct Constraints {
45
    inner: Vec<Constraint>,
46
}
47
48
impl Constraints {
49
    /// Create empty constraints
50
0
    pub fn empty() -> Self {
51
0
        Constraints::new_unverified(vec![])
52
0
    }
53
54
    /// Create a new `Constraints` object from the given `constraints`.
55
    /// Users should use the `empty` or `new_from_table_constraints` functions
56
    /// for constructing `Constraints`. This constructor is for internal
57
    /// purposes only and does not check whether the argument is valid. The user
58
    /// is responsible for supplying a valid vector of `Constraint` objects.
59
0
    pub fn new_unverified(constraints: Vec<Constraint>) -> Self {
60
0
        Self { inner: constraints }
61
0
    }
62
63
    /// Convert each `TableConstraint` to corresponding `Constraint`
64
0
    pub fn new_from_table_constraints(
65
0
        constraints: &[TableConstraint],
66
0
        df_schema: &DFSchemaRef,
67
0
    ) -> Result<Self> {
68
0
        let constraints = constraints
69
0
            .iter()
70
0
            .map(|c: &TableConstraint| match c {
71
0
                TableConstraint::Unique { name, columns, .. } => {
72
0
                    let field_names = df_schema.field_names();
73
                    // Get unique constraint indices in the schema:
74
0
                    let indices = columns
75
0
                        .iter()
76
0
                        .map(|u| {
77
0
                            let idx = field_names
78
0
                                .iter()
79
0
                                .position(|item| *item == u.value)
80
0
                                .ok_or_else(|| {
81
0
                                    let name = name
82
0
                                        .as_ref()
83
0
                                        .map(|name| format!("with name '{name}' "))
84
0
                                        .unwrap_or("".to_string());
85
0
                                    DataFusionError::Execution(
86
0
                                        format!("Column for unique constraint {}not found in schema: {}", name,u.value)
87
0
                                    )
88
0
                                })?;
89
0
                            Ok(idx)
90
0
                        })
91
0
                        .collect::<Result<Vec<_>>>()?;
92
0
                    Ok(Constraint::Unique(indices))
93
                }
94
0
                TableConstraint::PrimaryKey { columns, .. } => {
95
0
                    let field_names = df_schema.field_names();
96
                    // Get primary key indices in the schema:
97
0
                    let indices = columns
98
0
                        .iter()
99
0
                        .map(|pk| {
100
0
                            let idx = field_names
101
0
                                .iter()
102
0
                                .position(|item| *item == pk.value)
103
0
                                .ok_or_else(|| {
104
0
                                    DataFusionError::Execution(format!(
105
0
                                        "Column for primary key not found in schema: {}",
106
0
                                        pk.value
107
0
                                    ))
108
0
                                })?;
109
0
                            Ok(idx)
110
0
                        })
111
0
                        .collect::<Result<Vec<_>>>()?;
112
0
                    Ok(Constraint::PrimaryKey(indices))
113
                }
114
                TableConstraint::ForeignKey { .. } => {
115
0
                    _plan_err!("Foreign key constraints are not currently supported")
116
                }
117
                TableConstraint::Check { .. } => {
118
0
                    _plan_err!("Check constraints are not currently supported")
119
                }
120
                TableConstraint::Index { .. } => {
121
0
                    _plan_err!("Indexes are not currently supported")
122
                }
123
                TableConstraint::FulltextOrSpatial { .. } => {
124
0
                    _plan_err!("Indexes are not currently supported")
125
                }
126
0
            })
127
0
            .collect::<Result<Vec<_>>>()?;
128
0
        Ok(Constraints::new_unverified(constraints))
129
0
    }
130
131
    /// Check whether constraints is empty
132
0
    pub fn is_empty(&self) -> bool {
133
0
        self.inner.is_empty()
134
0
    }
135
}
136
137
impl IntoIterator for Constraints {
138
    type Item = Constraint;
139
    type IntoIter = IntoIter<Constraint>;
140
141
0
    fn into_iter(self) -> Self::IntoIter {
142
0
        self.inner.into_iter()
143
0
    }
144
}
145
146
impl Display for Constraints {
147
0
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
148
0
        let pk: Vec<String> = self.inner.iter().map(|c| format!("{:?}", c)).collect();
149
0
        let pk = pk.join(", ");
150
0
        if !pk.is_empty() {
151
0
            write!(f, " constraints=[{pk}]")
152
        } else {
153
0
            write!(f, "")
154
        }
155
0
    }
156
}
157
158
impl Deref for Constraints {
159
    type Target = [Constraint];
160
161
0
    fn deref(&self) -> &Self::Target {
162
0
        self.inner.as_slice()
163
0
    }
164
}
165
166
/// This object defines a functional dependence in the schema. A functional
167
/// dependence defines a relationship between determinant keys and dependent
168
/// columns. A determinant key is a column, or a set of columns, whose value
169
/// uniquely determines values of some other (dependent) columns. If two rows
170
/// have the same determinant key, dependent columns in these rows are
171
/// necessarily the same. If the determinant key is unique, the set of
172
/// dependent columns is equal to the entire schema and the determinant key can
173
/// serve as a primary key. Note that a primary key may "downgrade" into a
174
/// determinant key due to an operation such as a join, and this object is
175
/// used to track dependence relationships in such cases. For more information
176
/// on functional dependencies, see:
177
/// <https://www.scaler.com/topics/dbms/functional-dependency-in-dbms/>
178
#[derive(Debug, Clone, PartialEq, Eq)]
179
pub struct FunctionalDependence {
180
    // Column indices of the (possibly composite) determinant key:
181
    pub source_indices: Vec<usize>,
182
    // Column indices of dependent column(s):
183
    pub target_indices: Vec<usize>,
184
    /// Flag indicating whether one of the `source_indices` can receive NULL values.
185
    /// For a data source, if the constraint in question is `Constraint::Unique`,
186
    /// this flag is `true`. If the constraint in question is `Constraint::PrimaryKey`,
187
    /// this flag is `false`.
188
    /// Note that as the schema changes between different stages in a plan,
189
    /// such as after LEFT JOIN or RIGHT JOIN operations, this property may
190
    /// change.
191
    pub nullable: bool,
192
    // The functional dependency mode:
193
    pub mode: Dependency,
194
}
195
196
/// Describes functional dependency mode.
197
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
198
pub enum Dependency {
199
    Single, // A determinant key may occur only once.
200
    Multi,  // A determinant key may occur multiple times (in multiple rows).
201
}
202
203
impl FunctionalDependence {
204
    // Creates a new functional dependence.
205
0
    pub fn new(
206
0
        source_indices: Vec<usize>,
207
0
        target_indices: Vec<usize>,
208
0
        nullable: bool,
209
0
    ) -> Self {
210
0
        Self {
211
0
            source_indices,
212
0
            target_indices,
213
0
            nullable,
214
0
            // Start with the least restrictive mode by default:
215
0
            mode: Dependency::Multi,
216
0
        }
217
0
    }
218
219
0
    pub fn with_mode(mut self, mode: Dependency) -> Self {
220
0
        self.mode = mode;
221
0
        self
222
0
    }
223
}
224
225
/// This object encapsulates all functional dependencies in a given relation.
226
#[derive(Debug, Clone, PartialEq, Eq)]
227
pub struct FunctionalDependencies {
228
    deps: Vec<FunctionalDependence>,
229
}
230
231
impl FunctionalDependencies {
232
    /// Creates an empty `FunctionalDependencies` object.
233
0
    pub fn empty() -> Self {
234
0
        Self { deps: vec![] }
235
0
    }
236
237
    /// Creates a new `FunctionalDependencies` object from a vector of
238
    /// `FunctionalDependence` objects.
239
0
    pub fn new(dependencies: Vec<FunctionalDependence>) -> Self {
240
0
        Self { deps: dependencies }
241
0
    }
242
243
    /// Creates a new `FunctionalDependencies` object from the given constraints.
244
0
    pub fn new_from_constraints(
245
0
        constraints: Option<&Constraints>,
246
0
        n_field: usize,
247
0
    ) -> Self {
248
0
        if let Some(Constraints { inner: constraints }) = constraints {
249
            // Construct dependency objects based on each individual constraint:
250
0
            let dependencies = constraints
251
0
                .iter()
252
0
                .map(|constraint| {
253
                    // All the field indices are associated with the whole table
254
                    // since we are dealing with table level constraints:
255
0
                    let dependency = match constraint {
256
0
                        Constraint::PrimaryKey(indices) => FunctionalDependence::new(
257
0
                            indices.to_vec(),
258
0
                            (0..n_field).collect::<Vec<_>>(),
259
0
                            false,
260
0
                        ),
261
0
                        Constraint::Unique(indices) => FunctionalDependence::new(
262
0
                            indices.to_vec(),
263
0
                            (0..n_field).collect::<Vec<_>>(),
264
0
                            true,
265
0
                        ),
266
                    };
267
                    // As primary keys are guaranteed to be unique, set the
268
                    // functional dependency mode to `Dependency::Single`:
269
0
                    dependency.with_mode(Dependency::Single)
270
0
                })
271
0
                .collect::<Vec<_>>();
272
0
            Self::new(dependencies)
273
        } else {
274
            // There is no constraint, return an empty object:
275
0
            Self::empty()
276
        }
277
0
    }
278
279
0
    pub fn with_dependency(mut self, mode: Dependency) -> Self {
280
0
        self.deps.iter_mut().for_each(|item| item.mode = mode);
281
0
        self
282
0
    }
283
284
    /// Merges the given functional dependencies with these.
285
0
    pub fn extend(&mut self, other: FunctionalDependencies) {
286
0
        self.deps.extend(other.deps);
287
0
    }
288
289
    /// Sanity checks if functional dependencies are valid. For example, if
290
    /// there are 10 fields, we cannot receive any index further than 9.
291
0
    pub fn is_valid(&self, n_field: usize) -> bool {
292
0
        self.deps.iter().all(
293
0
            |FunctionalDependence {
294
                 source_indices,
295
                 target_indices,
296
                 ..
297
0
             }| {
298
0
                source_indices
299
0
                    .iter()
300
0
                    .max()
301
0
                    .map(|&max_index| max_index < n_field)
302
0
                    .unwrap_or(true)
303
0
                    && target_indices
304
0
                        .iter()
305
0
                        .max()
306
0
                        .map(|&max_index| max_index < n_field)
307
0
                        .unwrap_or(true)
308
0
            },
309
0
        )
310
0
    }
311
312
    /// Adds the `offset` value to `source_indices` and `target_indices` for
313
    /// each functional dependency.
314
0
    pub fn add_offset(&mut self, offset: usize) {
315
0
        self.deps.iter_mut().for_each(
316
0
            |FunctionalDependence {
317
                 source_indices,
318
                 target_indices,
319
                 ..
320
0
             }| {
321
0
                *source_indices = add_offset_to_vec(source_indices, offset);
322
0
                *target_indices = add_offset_to_vec(target_indices, offset);
323
0
            },
324
0
        )
325
0
    }
326
327
    /// Updates `source_indices` and `target_indices` of each functional
328
    /// dependence using the index mapping given in `proj_indices`.
329
    ///
330
    /// Assume that `proj_indices` is \[2, 5, 8\] and we have a functional
331
    /// dependence \[5\] (`source_indices`) -> \[5, 8\] (`target_indices`).
332
    /// In the updated schema, fields at indices \[2, 5, 8\] will transform
333
    /// to \[0, 1, 2\]. Therefore, the resulting functional dependence will
334
    /// be \[1\] -> \[1, 2\].
335
0
    pub fn project_functional_dependencies(
336
0
        &self,
337
0
        proj_indices: &[usize],
338
0
        // The argument `n_out` denotes the schema field length, which is needed
339
0
        // to correctly associate a `Single`-mode dependence with the whole table.
340
0
        n_out: usize,
341
0
    ) -> FunctionalDependencies {
342
0
        let mut projected_func_dependencies = vec![];
343
        for FunctionalDependence {
344
0
            source_indices,
345
0
            target_indices,
346
0
            nullable,
347
0
            mode,
348
0
        } in &self.deps
349
        {
350
0
            let new_source_indices =
351
0
                update_elements_with_matching_indices(source_indices, proj_indices);
352
0
            let new_target_indices = if *mode == Dependency::Single {
353
                // Associate with all of the fields in the schema:
354
0
                (0..n_out).collect()
355
            } else {
356
                // Update associations according to projection:
357
0
                update_elements_with_matching_indices(target_indices, proj_indices)
358
            };
359
            // All of the composite indices should still be valid after projection;
360
            // otherwise, functional dependency cannot be propagated.
361
0
            if new_source_indices.len() == source_indices.len() {
362
0
                let new_func_dependence = FunctionalDependence::new(
363
0
                    new_source_indices,
364
0
                    new_target_indices,
365
0
                    *nullable,
366
0
                )
367
0
                .with_mode(*mode);
368
0
                projected_func_dependencies.push(new_func_dependence);
369
0
            }
370
        }
371
0
        FunctionalDependencies::new(projected_func_dependencies)
372
0
    }
373
374
    /// This function joins this set of functional dependencies with the `other`
375
    /// according to the given `join_type`.
376
0
    pub fn join(
377
0
        &self,
378
0
        other: &FunctionalDependencies,
379
0
        join_type: &JoinType,
380
0
        left_cols_len: usize,
381
0
    ) -> FunctionalDependencies {
382
0
        // Get mutable copies of left and right side dependencies:
383
0
        let mut right_func_dependencies = other.clone();
384
0
        let mut left_func_dependencies = self.clone();
385
0
386
0
        match join_type {
387
            JoinType::Inner | JoinType::Left | JoinType::Right => {
388
                // Add offset to right schema:
389
0
                right_func_dependencies.add_offset(left_cols_len);
390
0
391
0
                // Result may have multiple values, update the dependency mode:
392
0
                left_func_dependencies =
393
0
                    left_func_dependencies.with_dependency(Dependency::Multi);
394
0
                right_func_dependencies =
395
0
                    right_func_dependencies.with_dependency(Dependency::Multi);
396
0
397
0
                if *join_type == JoinType::Left {
398
0
                    // Downgrade the right side, since it may have additional NULL values:
399
0
                    right_func_dependencies.downgrade_dependencies();
400
0
                } else if *join_type == JoinType::Right {
401
0
                    // Downgrade the left side, since it may have additional NULL values:
402
0
                    left_func_dependencies.downgrade_dependencies();
403
0
                }
404
                // Combine left and right functional dependencies:
405
0
                left_func_dependencies.extend(right_func_dependencies);
406
0
                left_func_dependencies
407
            }
408
            JoinType::LeftSemi | JoinType::LeftAnti => {
409
                // These joins preserve functional dependencies of the left side:
410
0
                left_func_dependencies
411
            }
412
            JoinType::RightSemi | JoinType::RightAnti => {
413
                // These joins preserve functional dependencies of the right side:
414
0
                right_func_dependencies
415
            }
416
            JoinType::Full => {
417
                // All of the functional dependencies are lost in a FULL join:
418
0
                FunctionalDependencies::empty()
419
            }
420
        }
421
0
    }
422
423
    /// This function downgrades a functional dependency when nullability becomes
424
    /// a possibility:
425
    /// - If the dependency in question is UNIQUE (i.e. nullable), a new null value
426
    ///   invalidates the dependency.
427
    /// - If the dependency in question is PRIMARY KEY (i.e. not nullable), a new
428
    ///   null value turns it into UNIQUE mode.
429
0
    fn downgrade_dependencies(&mut self) {
430
0
        // Delete nullable dependencies, since they are no longer valid:
431
0
        self.deps.retain(|item| !item.nullable);
432
0
        self.deps.iter_mut().for_each(|item| item.nullable = true);
433
0
    }
434
435
    /// This function ensures that functional dependencies involving uniquely
436
    /// occurring determinant keys cover their entire table in terms of
437
    /// dependent columns.
438
0
    pub fn extend_target_indices(&mut self, n_out: usize) {
439
0
        self.deps.iter_mut().for_each(
440
0
            |FunctionalDependence {
441
                 mode,
442
                 target_indices,
443
                 ..
444
0
             }| {
445
0
                // If unique, cover the whole table:
446
0
                if *mode == Dependency::Single {
447
0
                    *target_indices = (0..n_out).collect::<Vec<_>>();
448
0
                }
449
0
            },
450
0
        )
451
0
    }
452
}
453
454
impl Deref for FunctionalDependencies {
455
    type Target = [FunctionalDependence];
456
457
0
    fn deref(&self) -> &Self::Target {
458
0
        self.deps.as_slice()
459
0
    }
460
}
461
462
/// Calculates functional dependencies for aggregate output, when there is a GROUP BY expression.
463
0
pub fn aggregate_functional_dependencies(
464
0
    aggr_input_schema: &DFSchema,
465
0
    group_by_expr_names: &[String],
466
0
    aggr_schema: &DFSchema,
467
0
) -> FunctionalDependencies {
468
0
    let mut aggregate_func_dependencies = vec![];
469
0
    let aggr_input_fields = aggr_input_schema.field_names();
470
0
    let aggr_fields = aggr_schema.fields();
471
0
    // Association covers the whole table:
472
0
    let target_indices = (0..aggr_schema.fields().len()).collect::<Vec<_>>();
473
0
    // Get functional dependencies of the schema:
474
0
    let func_dependencies = aggr_input_schema.functional_dependencies();
475
    for FunctionalDependence {
476
0
        source_indices,
477
0
        nullable,
478
0
        mode,
479
        ..
480
0
    } in &func_dependencies.deps
481
    {
482
        // Keep source indices in a `HashSet` to prevent duplicate entries:
483
0
        let mut new_source_indices = vec![];
484
0
        let mut new_source_field_names = vec![];
485
0
        let source_field_names = source_indices
486
0
            .iter()
487
0
            .map(|&idx| &aggr_input_fields[idx])
488
0
            .collect::<Vec<_>>();
489
490
0
        for (idx, group_by_expr_name) in group_by_expr_names.iter().enumerate() {
491
            // When one of the input determinant expressions matches with
492
            // the GROUP BY expression, add the index of the GROUP BY
493
            // expression as a new determinant key:
494
0
            if source_field_names.contains(&group_by_expr_name) {
495
0
                new_source_indices.push(idx);
496
0
                new_source_field_names.push(group_by_expr_name.clone());
497
0
            }
498
        }
499
0
        let existing_target_indices =
500
0
            get_target_functional_dependencies(aggr_input_schema, group_by_expr_names);
501
0
        let new_target_indices = get_target_functional_dependencies(
502
0
            aggr_input_schema,
503
0
            &new_source_field_names,
504
0
        );
505
0
        let mode = if existing_target_indices == new_target_indices
506
0
            && new_target_indices.is_some()
507
        {
508
            // If dependency covers all GROUP BY expressions, mode will be `Single`:
509
0
            Dependency::Single
510
        } else {
511
            // Otherwise, existing mode is preserved:
512
0
            *mode
513
        };
514
        // All of the composite indices occur in the GROUP BY expression:
515
0
        if new_source_indices.len() == source_indices.len() {
516
0
            aggregate_func_dependencies.push(
517
0
                FunctionalDependence::new(
518
0
                    new_source_indices,
519
0
                    target_indices.clone(),
520
0
                    *nullable,
521
0
                )
522
0
                .with_mode(mode),
523
0
            );
524
0
        }
525
    }
526
527
    // When we have a GROUP BY key, we can guarantee uniqueness after
528
    // aggregation:
529
0
    if !group_by_expr_names.is_empty() {
530
0
        let count = group_by_expr_names.len();
531
0
        let source_indices = (0..count).collect::<Vec<_>>();
532
0
        let nullable = source_indices
533
0
            .iter()
534
0
            .any(|idx| aggr_fields[*idx].is_nullable());
535
0
        // If GROUP BY expressions do not already act as a determinant:
536
0
        if !aggregate_func_dependencies.iter().any(|item| {
537
0
            // If `item.source_indices` is a subset of GROUP BY expressions, we shouldn't add
538
0
            // them since `item.source_indices` defines this relation already.
539
0
540
0
            // The following simple comparison is working well because
541
0
            // GROUP BY expressions come here as a prefix.
542
0
            item.source_indices.iter().all(|idx| idx < &count)
543
0
        }) {
544
0
            // Add a new functional dependency associated with the whole table:
545
0
            // Use nullable property of the GROUP BY expression:
546
0
            aggregate_func_dependencies.push(
547
0
                // Use nullable property of the GROUP BY expression:
548
0
                FunctionalDependence::new(source_indices, target_indices, nullable)
549
0
                    .with_mode(Dependency::Single),
550
0
            );
551
0
        }
552
0
    }
553
0
    FunctionalDependencies::new(aggregate_func_dependencies)
554
0
}
555
556
/// Returns target indices, for the determinant keys that are inside
557
/// group by expressions.
558
0
pub fn get_target_functional_dependencies(
559
0
    schema: &DFSchema,
560
0
    group_by_expr_names: &[String],
561
0
) -> Option<Vec<usize>> {
562
0
    let mut combined_target_indices = HashSet::new();
563
0
    let dependencies = schema.functional_dependencies();
564
0
    let field_names = schema.field_names();
565
    for FunctionalDependence {
566
0
        source_indices,
567
0
        target_indices,
568
        ..
569
0
    } in &dependencies.deps
570
    {
571
0
        let source_key_names = source_indices
572
0
            .iter()
573
0
            .map(|id_key_idx| &field_names[*id_key_idx])
574
0
            .collect::<Vec<_>>();
575
0
        // If the GROUP BY expression contains a determinant key, we can use
576
0
        // the associated fields after aggregation even if they are not part
577
0
        // of the GROUP BY expression.
578
0
        if source_key_names
579
0
            .iter()
580
0
            .all(|source_key_name| group_by_expr_names.contains(source_key_name))
581
0
        {
582
0
            combined_target_indices.extend(target_indices.iter());
583
0
        }
584
    }
585
0
    (!combined_target_indices.is_empty()).then_some({
586
0
        let mut result = combined_target_indices.into_iter().collect::<Vec<_>>();
587
0
        result.sort();
588
0
        result
589
0
    })
590
0
}
591
592
/// Returns indices for the minimal subset of GROUP BY expressions that are
593
/// functionally equivalent to the original set of GROUP BY expressions.
594
0
pub fn get_required_group_by_exprs_indices(
595
0
    schema: &DFSchema,
596
0
    group_by_expr_names: &[String],
597
0
) -> Option<Vec<usize>> {
598
0
    let dependencies = schema.functional_dependencies();
599
0
    let field_names = schema.field_names();
600
0
    let mut groupby_expr_indices = group_by_expr_names
601
0
        .iter()
602
0
        .map(|group_by_expr_name| {
603
0
            field_names
604
0
                .iter()
605
0
                .position(|field_name| field_name == group_by_expr_name)
606
0
        })
607
0
        .collect::<Option<Vec<_>>>()?;
608
609
0
    groupby_expr_indices.sort();
610
    for FunctionalDependence {
611
0
        source_indices,
612
0
        target_indices,
613
        ..
614
0
    } in &dependencies.deps
615
    {
616
0
        if source_indices
617
0
            .iter()
618
0
            .all(|source_idx| groupby_expr_indices.contains(source_idx))
619
0
        {
620
0
            // If all source indices are among GROUP BY expression indices, we
621
0
            // can remove target indices from GROUP BY expression indices and
622
0
            // use source indices instead.
623
0
            groupby_expr_indices = set_difference(&groupby_expr_indices, target_indices);
624
0
            groupby_expr_indices =
625
0
                merge_and_order_indices(groupby_expr_indices, source_indices);
626
0
        }
627
    }
628
0
    groupby_expr_indices
629
0
        .iter()
630
0
        .map(|idx| {
631
0
            group_by_expr_names
632
0
                .iter()
633
0
                .position(|name| &field_names[*idx] == name)
634
0
        })
635
0
        .collect()
636
0
}
637
638
/// Updates entries inside the `entries` vector with their corresponding
639
/// indices inside the `proj_indices` vector.
640
0
fn update_elements_with_matching_indices(
641
0
    entries: &[usize],
642
0
    proj_indices: &[usize],
643
0
) -> Vec<usize> {
644
0
    entries
645
0
        .iter()
646
0
        .filter_map(|val| proj_indices.iter().position(|proj_idx| proj_idx == val))
647
0
        .collect()
648
0
}
649
650
/// Adds `offset` value to each entry inside `in_data`.
651
0
fn add_offset_to_vec<T: Copy + std::ops::Add<Output = T>>(
652
0
    in_data: &[T],
653
0
    offset: T,
654
0
) -> Vec<T> {
655
0
    in_data.iter().map(|&item| item + offset).collect()
656
0
}
657
658
#[cfg(test)]
659
mod tests {
660
    use super::*;
661
662
    #[test]
663
    fn constraints_iter() {
664
        let constraints = Constraints::new_unverified(vec![
665
            Constraint::PrimaryKey(vec![10]),
666
            Constraint::Unique(vec![20]),
667
        ]);
668
        let mut iter = constraints.iter();
669
        assert_eq!(iter.next(), Some(&Constraint::PrimaryKey(vec![10])));
670
        assert_eq!(iter.next(), Some(&Constraint::Unique(vec![20])));
671
        assert_eq!(iter.next(), None);
672
    }
673
674
    #[test]
675
    fn test_get_updated_id_keys() {
676
        let fund_dependencies =
677
            FunctionalDependencies::new(vec![FunctionalDependence::new(
678
                vec![1],
679
                vec![0, 1, 2],
680
                true,
681
            )]);
682
        let res = fund_dependencies.project_functional_dependencies(&[1, 2], 2);
683
        let expected = FunctionalDependencies::new(vec![FunctionalDependence::new(
684
            vec![0],
685
            vec![0, 1],
686
            true,
687
        )]);
688
        assert_eq!(res, expected);
689
    }
690
}