Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/common/src/dfschema.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! DFSchema is an extended schema struct that DataFusion uses to provide support for
19
//! fields with optional relation names.
20
21
use std::collections::{BTreeSet, HashMap, HashSet};
22
use std::fmt::{Display, Formatter};
23
use std::hash::Hash;
24
use std::sync::Arc;
25
26
use crate::error::{DataFusionError, Result, _plan_err, _schema_err};
27
use crate::{
28
    field_not_found, unqualified_field_not_found, Column, FunctionalDependencies,
29
    SchemaError, TableReference,
30
};
31
32
use arrow::compute::can_cast_types;
33
use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
34
use arrow_schema::SchemaBuilder;
35
36
/// A reference-counted reference to a [DFSchema].
37
pub type DFSchemaRef = Arc<DFSchema>;
38
39
/// DFSchema wraps an Arrow schema and adds relation names.
40
///
41
/// The schema may hold the fields across multiple tables. Some fields may be
42
/// qualified and some unqualified. A qualified field is a field that has a
43
/// relation name associated with it.
44
///
45
/// Unqualified fields must be unique not only amongst themselves, but also must
46
/// have a distinct name from any qualified field names. This allows finding a
47
/// qualified field by name to be possible, so long as there aren't multiple
48
/// qualified fields with the same name.
49
///
50
/// There is an alias to `Arc<DFSchema>` named [DFSchemaRef].
51
///
52
/// # Creating qualified schemas
53
///
54
/// Use [DFSchema::try_from_qualified_schema] to create a qualified schema from
55
/// an Arrow schema.
56
///
57
/// ```rust
58
/// use datafusion_common::{DFSchema, Column};
59
/// use arrow_schema::{DataType, Field, Schema};
60
///
61
/// let arrow_schema = Schema::new(vec![
62
///    Field::new("c1", DataType::Int32, false),
63
/// ]);
64
///
65
/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap();
66
/// let column = Column::from_qualified_name("t1.c1");
67
/// assert!(df_schema.has_column(&column));
68
///
69
/// // Can also access qualified fields with unqualified name, if it's unambiguous
70
/// let column = Column::from_qualified_name("c1");
71
/// assert!(df_schema.has_column(&column));
72
/// ```
73
///
74
/// # Creating unqualified schemas
75
///
76
/// Create an unqualified schema using TryFrom:
77
///
78
/// ```rust
79
/// use datafusion_common::{DFSchema, Column};
80
/// use arrow_schema::{DataType, Field, Schema};
81
///
82
/// let arrow_schema = Schema::new(vec![
83
///    Field::new("c1", DataType::Int32, false),
84
/// ]);
85
///
86
/// let df_schema = DFSchema::try_from(arrow_schema).unwrap();
87
/// let column = Column::new_unqualified("c1");
88
/// assert!(df_schema.has_column(&column));
89
/// ```
90
///
91
/// # Converting back to Arrow schema
92
///
93
/// Use the `Into` trait to convert `DFSchema` into an Arrow schema:
94
///
95
/// ```rust
96
/// use datafusion_common::DFSchema;
97
/// use arrow_schema::Schema;
98
/// use arrow::datatypes::Field;
99
/// use std::collections::HashMap;
100
///
101
/// let df_schema = DFSchema::from_unqualified_fields(vec![
102
///    Field::new("c1", arrow::datatypes::DataType::Int32, false),
103
/// ].into(),HashMap::new()).unwrap();
104
/// let schema = Schema::from(df_schema);
105
/// assert_eq!(schema.fields().len(), 1);
106
/// ```
107
#[derive(Debug, Clone, PartialEq, Eq)]
108
pub struct DFSchema {
109
    /// Inner Arrow schema reference.
110
    inner: SchemaRef,
111
    /// Optional qualifiers for each column in this schema. In the same order as
112
    /// the `self.inner.fields()`
113
    field_qualifiers: Vec<Option<TableReference>>,
114
    /// Stores functional dependencies in the schema.
115
    functional_dependencies: FunctionalDependencies,
116
}
117
118
impl DFSchema {
119
    /// Creates an empty `DFSchema`
120
0
    pub fn empty() -> Self {
121
0
        Self {
122
0
            inner: Arc::new(Schema::new([])),
123
0
            field_qualifiers: vec![],
124
0
            functional_dependencies: FunctionalDependencies::empty(),
125
0
        }
126
0
    }
127
128
    /// Return a reference to the inner Arrow [`Schema`]
129
    ///
130
    /// Note this does not have the qualifier information
131
0
    pub fn as_arrow(&self) -> &Schema {
132
0
        self.inner.as_ref()
133
0
    }
134
135
    /// Return a reference to the inner Arrow [`SchemaRef`]
136
    ///
137
    /// Note this does not have the qualifier information
138
0
    pub fn inner(&self) -> &SchemaRef {
139
0
        &self.inner
140
0
    }
141
142
    /// Create a `DFSchema` from an Arrow schema where all the fields have a given qualifier
143
0
    pub fn new_with_metadata(
144
0
        qualified_fields: Vec<(Option<TableReference>, Arc<Field>)>,
145
0
        metadata: HashMap<String, String>,
146
0
    ) -> Result<Self> {
147
0
        let (qualifiers, fields): (Vec<Option<TableReference>>, Vec<Arc<Field>>) =
148
0
            qualified_fields.into_iter().unzip();
149
0
150
0
        let schema = Arc::new(Schema::new_with_metadata(fields, metadata));
151
0
152
0
        let dfschema = Self {
153
0
            inner: schema,
154
0
            field_qualifiers: qualifiers,
155
0
            functional_dependencies: FunctionalDependencies::empty(),
156
0
        };
157
0
        dfschema.check_names()?;
158
0
        Ok(dfschema)
159
0
    }
160
161
    /// Create a new `DFSchema` from a list of Arrow [Field]s
162
    #[allow(deprecated)]
163
0
    pub fn from_unqualified_fields(
164
0
        fields: Fields,
165
0
        metadata: HashMap<String, String>,
166
0
    ) -> Result<Self> {
167
0
        Self::from_unqualifed_fields(fields, metadata)
168
0
    }
169
170
    /// Create a new `DFSchema` from a list of Arrow [Field]s
171
    #[deprecated(
172
        since = "40.0.0",
173
        note = "Please use `from_unqualified_fields` instead (this one's name is a typo). This method is subject to be removed soon"
174
    )]
175
0
    pub fn from_unqualifed_fields(
176
0
        fields: Fields,
177
0
        metadata: HashMap<String, String>,
178
0
    ) -> Result<Self> {
179
0
        let field_count = fields.len();
180
0
        let schema = Arc::new(Schema::new_with_metadata(fields, metadata));
181
0
        let dfschema = Self {
182
0
            inner: schema,
183
0
            field_qualifiers: vec![None; field_count],
184
0
            functional_dependencies: FunctionalDependencies::empty(),
185
0
        };
186
0
        dfschema.check_names()?;
187
0
        Ok(dfschema)
188
0
    }
189
190
    /// Create a `DFSchema` from an Arrow schema and a given qualifier
191
    ///
192
    /// To create a schema from an Arrow schema without a qualifier, use
193
    /// `DFSchema::try_from`.
194
0
    pub fn try_from_qualified_schema(
195
0
        qualifier: impl Into<TableReference>,
196
0
        schema: &Schema,
197
0
    ) -> Result<Self> {
198
0
        let qualifier = qualifier.into();
199
0
        let schema = DFSchema {
200
0
            inner: schema.clone().into(),
201
0
            field_qualifiers: vec![Some(qualifier); schema.fields.len()],
202
0
            functional_dependencies: FunctionalDependencies::empty(),
203
0
        };
204
0
        schema.check_names()?;
205
0
        Ok(schema)
206
0
    }
207
208
    /// Create a `DFSchema` from an Arrow schema where all the fields have a given qualifier
209
0
    pub fn from_field_specific_qualified_schema(
210
0
        qualifiers: Vec<Option<TableReference>>,
211
0
        schema: &SchemaRef,
212
0
    ) -> Result<Self> {
213
0
        let dfschema = Self {
214
0
            inner: Arc::clone(schema),
215
0
            field_qualifiers: qualifiers,
216
0
            functional_dependencies: FunctionalDependencies::empty(),
217
0
        };
218
0
        dfschema.check_names()?;
219
0
        Ok(dfschema)
220
0
    }
221
222
    /// Check if the schema have some fields with the same name
223
0
    pub fn check_names(&self) -> Result<()> {
224
0
        let mut qualified_names = BTreeSet::new();
225
0
        let mut unqualified_names = BTreeSet::new();
226
227
0
        for (field, qualifier) in self.inner.fields().iter().zip(&self.field_qualifiers) {
228
0
            if let Some(qualifier) = qualifier {
229
0
                if !qualified_names.insert((qualifier, field.name())) {
230
0
                    return _schema_err!(SchemaError::DuplicateQualifiedField {
231
0
                        qualifier: Box::new(qualifier.clone()),
232
0
                        name: field.name().to_string(),
233
0
                    });
234
0
                }
235
0
            } else if !unqualified_names.insert(field.name()) {
236
0
                return _schema_err!(SchemaError::DuplicateUnqualifiedField {
237
0
                    name: field.name().to_string()
238
0
                });
239
0
            }
240
        }
241
242
0
        for (qualifier, name) in qualified_names {
243
0
            if unqualified_names.contains(name) {
244
0
                return _schema_err!(SchemaError::AmbiguousReference {
245
0
                    field: Column::new(Some(qualifier.clone()), name)
246
0
                });
247
0
            }
248
        }
249
0
        Ok(())
250
0
    }
251
252
    /// Assigns functional dependencies.
253
0
    pub fn with_functional_dependencies(
254
0
        mut self,
255
0
        functional_dependencies: FunctionalDependencies,
256
0
    ) -> Result<Self> {
257
0
        if functional_dependencies.is_valid(self.inner.fields.len()) {
258
0
            self.functional_dependencies = functional_dependencies;
259
0
            Ok(self)
260
        } else {
261
0
            _plan_err!(
262
0
                "Invalid functional dependency: {:?}",
263
0
                functional_dependencies
264
0
            )
265
        }
266
0
    }
267
268
    /// Create a new schema that contains the fields from this schema followed by the fields
269
    /// from the supplied schema. An error will be returned if there are duplicate field names.
270
0
    pub fn join(&self, schema: &DFSchema) -> Result<Self> {
271
0
        let mut schema_builder = SchemaBuilder::new();
272
0
        schema_builder.extend(self.inner.fields().iter().cloned());
273
0
        schema_builder.extend(schema.fields().iter().cloned());
274
0
        let new_schema = schema_builder.finish();
275
0
276
0
        let mut new_metadata = self.inner.metadata.clone();
277
0
        new_metadata.extend(schema.inner.metadata.clone());
278
0
        let new_schema_with_metadata = new_schema.with_metadata(new_metadata);
279
0
280
0
        let mut new_qualifiers = self.field_qualifiers.clone();
281
0
        new_qualifiers.extend_from_slice(schema.field_qualifiers.as_slice());
282
0
283
0
        let new_self = Self {
284
0
            inner: Arc::new(new_schema_with_metadata),
285
0
            field_qualifiers: new_qualifiers,
286
0
            functional_dependencies: FunctionalDependencies::empty(),
287
0
        };
288
0
        new_self.check_names()?;
289
0
        Ok(new_self)
290
0
    }
291
292
    /// Modify this schema by appending the fields from the supplied schema, ignoring any
293
    /// duplicate fields.
294
0
    pub fn merge(&mut self, other_schema: &DFSchema) {
295
0
        if other_schema.inner.fields.is_empty() {
296
0
            return;
297
0
        }
298
0
299
0
        let self_fields: HashSet<(Option<&TableReference>, &FieldRef)> =
300
0
            self.iter().collect();
301
0
        let self_unqualified_names: HashSet<&str> = self
302
0
            .inner
303
0
            .fields
304
0
            .iter()
305
0
            .map(|field| field.name().as_str())
306
0
            .collect();
307
0
308
0
        let mut schema_builder = SchemaBuilder::from(self.inner.fields.clone());
309
0
        let mut qualifiers = Vec::new();
310
0
        for (qualifier, field) in other_schema.iter() {
311
            // skip duplicate columns
312
0
            let duplicated_field = match qualifier {
313
0
                Some(q) => self_fields.contains(&(Some(q), field)),
314
                // for unqualified columns, check as unqualified name
315
0
                None => self_unqualified_names.contains(field.name().as_str()),
316
            };
317
0
            if !duplicated_field {
318
0
                // self.inner.fields.push(field.clone());
319
0
                schema_builder.push(Arc::clone(field));
320
0
                qualifiers.push(qualifier.cloned());
321
0
            }
322
        }
323
0
        let mut metadata = self.inner.metadata.clone();
324
0
        metadata.extend(other_schema.inner.metadata.clone());
325
0
326
0
        let finished = schema_builder.finish();
327
0
        let finished_with_metadata = finished.with_metadata(metadata);
328
0
        self.inner = finished_with_metadata.into();
329
0
        self.field_qualifiers.extend(qualifiers);
330
0
    }
331
332
    /// Get a list of fields
333
0
    pub fn fields(&self) -> &Fields {
334
0
        &self.inner.fields
335
0
    }
336
337
    /// Returns an immutable reference of a specific `Field` instance selected using an
338
    /// offset within the internal `fields` vector
339
0
    pub fn field(&self, i: usize) -> &Field {
340
0
        &self.inner.fields[i]
341
0
    }
342
343
    /// Returns an immutable reference of a specific `Field` instance selected using an
344
    /// offset within the internal `fields` vector and its qualifier
345
0
    pub fn qualified_field(&self, i: usize) -> (Option<&TableReference>, &Field) {
346
0
        (self.field_qualifiers[i].as_ref(), self.field(i))
347
0
    }
348
349
0
    pub fn index_of_column_by_name(
350
0
        &self,
351
0
        qualifier: Option<&TableReference>,
352
0
        name: &str,
353
0
    ) -> Option<usize> {
354
0
        let mut matches = self
355
0
            .iter()
356
0
            .enumerate()
357
0
            .filter(|(_, (q, f))| match (qualifier, q) {
358
                // field to lookup is qualified.
359
                // current field is qualified and not shared between relations, compare both
360
                // qualifier and name.
361
0
                (Some(q), Some(field_q)) => q.resolved_eq(field_q) && f.name() == name,
362
                // field to lookup is qualified but current field is unqualified.
363
0
                (Some(_), None) => false,
364
                // field to lookup is unqualified, no need to compare qualifier
365
0
                (None, Some(_)) | (None, None) => f.name() == name,
366
0
            })
367
0
            .map(|(idx, _)| idx);
368
0
        matches.next()
369
0
    }
370
371
    /// Find the index of the column with the given qualifier and name,
372
    /// returning `None` if not found
373
    ///
374
    /// See [Self::index_of_column] for a version that returns an error if the
375
    /// column is not found
376
0
    pub fn maybe_index_of_column(&self, col: &Column) -> Option<usize> {
377
0
        self.index_of_column_by_name(col.relation.as_ref(), &col.name)
378
0
    }
379
380
    /// Find the index of the column with the given qualifier and name,
381
    /// returning `Err` if not found
382
    ///
383
    /// See [Self::maybe_index_of_column] for a version that returns `None` if
384
    /// the column is not found
385
0
    pub fn index_of_column(&self, col: &Column) -> Result<usize> {
386
0
        self.maybe_index_of_column(col)
387
0
            .ok_or_else(|| field_not_found(col.relation.clone(), &col.name, self))
388
0
    }
389
390
    /// Check if the column is in the current schema
391
0
    pub fn is_column_from_schema(&self, col: &Column) -> bool {
392
0
        self.index_of_column_by_name(col.relation.as_ref(), &col.name)
393
0
            .is_some()
394
0
    }
395
396
    /// Find the field with the given name
397
0
    pub fn field_with_name(
398
0
        &self,
399
0
        qualifier: Option<&TableReference>,
400
0
        name: &str,
401
0
    ) -> Result<&Field> {
402
0
        if let Some(qualifier) = qualifier {
403
0
            self.field_with_qualified_name(qualifier, name)
404
        } else {
405
0
            self.field_with_unqualified_name(name)
406
        }
407
0
    }
408
409
    /// Check whether the column reference is ambiguous
410
0
    pub fn check_ambiguous_name(
411
0
        &self,
412
0
        qualifier: Option<&TableReference>,
413
0
        name: &str,
414
0
    ) -> Result<()> {
415
0
        let count = self
416
0
            .iter()
417
0
            .filter(|(field_q, f)| match (field_q, qualifier) {
418
0
                (Some(q1), Some(q2)) => q1.resolved_eq(q2) && f.name() == name,
419
0
                (None, None) => f.name() == name,
420
0
                _ => false,
421
0
            })
422
0
            .take(2)
423
0
            .count();
424
0
        if count > 1 {
425
0
            _schema_err!(SchemaError::AmbiguousReference {
426
0
                field: Column {
427
0
                    relation: None,
428
0
                    name: name.to_string(),
429
0
                },
430
0
            })
431
        } else {
432
0
            Ok(())
433
        }
434
0
    }
435
436
    /// Find the qualified field with the given name
437
0
    pub fn qualified_field_with_name(
438
0
        &self,
439
0
        qualifier: Option<&TableReference>,
440
0
        name: &str,
441
0
    ) -> Result<(Option<&TableReference>, &Field)> {
442
0
        if let Some(qualifier) = qualifier {
443
0
            let idx = self
444
0
                .index_of_column_by_name(Some(qualifier), name)
445
0
                .ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?;
446
0
            Ok((self.field_qualifiers[idx].as_ref(), self.field(idx)))
447
        } else {
448
0
            self.qualified_field_with_unqualified_name(name)
449
        }
450
0
    }
451
452
    /// Find all fields having the given qualifier
453
0
    pub fn fields_with_qualified(&self, qualifier: &TableReference) -> Vec<&Field> {
454
0
        self.iter()
455
0
            .filter(|(q, _)| q.map(|q| q.eq(qualifier)).unwrap_or(false))
456
0
            .map(|(_, f)| f.as_ref())
457
0
            .collect()
458
0
    }
459
460
    /// Find all fields indices having the given qualifier
461
0
    pub fn fields_indices_with_qualified(
462
0
        &self,
463
0
        qualifier: &TableReference,
464
0
    ) -> Vec<usize> {
465
0
        self.iter()
466
0
            .enumerate()
467
0
            .filter_map(|(idx, (q, _))| q.and_then(|q| q.eq(qualifier).then_some(idx)))
468
0
            .collect()
469
0
    }
470
471
    /// Find all fields that match the given name
472
0
    pub fn fields_with_unqualified_name(&self, name: &str) -> Vec<&Field> {
473
0
        self.fields()
474
0
            .iter()
475
0
            .filter(|field| field.name() == name)
476
0
            .map(|f| f.as_ref())
477
0
            .collect()
478
0
    }
479
480
    /// Find all fields that match the given name and return them with their qualifier
481
0
    pub fn qualified_fields_with_unqualified_name(
482
0
        &self,
483
0
        name: &str,
484
0
    ) -> Vec<(Option<&TableReference>, &Field)> {
485
0
        self.iter()
486
0
            .filter(|(_, field)| field.name() == name)
487
0
            .map(|(qualifier, field)| (qualifier, field.as_ref()))
488
0
            .collect()
489
0
    }
490
491
    /// Find all fields that match the given name and convert to column
492
0
    pub fn columns_with_unqualified_name(&self, name: &str) -> Vec<Column> {
493
0
        self.iter()
494
0
            .filter(|(_, field)| field.name() == name)
495
0
            .map(|(qualifier, field)| Column::new(qualifier.cloned(), field.name()))
496
0
            .collect()
497
0
    }
498
499
    /// Return all `Column`s for the schema
500
0
    pub fn columns(&self) -> Vec<Column> {
501
0
        self.iter()
502
0
            .map(|(qualifier, field)| {
503
0
                Column::new(qualifier.cloned(), field.name().clone())
504
0
            })
505
0
            .collect()
506
0
    }
507
508
    /// Find the qualified field with the given unqualified name
509
0
    pub fn qualified_field_with_unqualified_name(
510
0
        &self,
511
0
        name: &str,
512
0
    ) -> Result<(Option<&TableReference>, &Field)> {
513
0
        let matches = self.qualified_fields_with_unqualified_name(name);
514
0
        match matches.len() {
515
0
            0 => Err(unqualified_field_not_found(name, self)),
516
0
            1 => Ok((matches[0].0, (matches[0].1))),
517
            _ => {
518
                // When `matches` size > 1, it doesn't necessarily mean an `ambiguous name` problem.
519
                // Because name may generate from Alias/... . It means that it don't own qualifier.
520
                // For example:
521
                //             Join on id = b.id
522
                // Project a.id as id   TableScan b id
523
                // In this case, there isn't `ambiguous name` problem. When `matches` just contains
524
                // one field without qualifier, we should return it.
525
0
                let fields_without_qualifier = matches
526
0
                    .iter()
527
0
                    .filter(|(q, _)| q.is_none())
528
0
                    .collect::<Vec<_>>();
529
0
                if fields_without_qualifier.len() == 1 {
530
0
                    Ok((fields_without_qualifier[0].0, fields_without_qualifier[0].1))
531
                } else {
532
0
                    _schema_err!(SchemaError::AmbiguousReference {
533
0
                        field: Column {
534
0
                            relation: None,
535
0
                            name: name.to_string(),
536
0
                        },
537
0
                    })
538
                }
539
            }
540
        }
541
0
    }
542
543
    /// Find the field with the given name
544
0
    pub fn field_with_unqualified_name(&self, name: &str) -> Result<&Field> {
545
0
        self.qualified_field_with_unqualified_name(name)
546
0
            .map(|(_, field)| field)
547
0
    }
548
549
    /// Find the field with the given qualified name
550
0
    pub fn field_with_qualified_name(
551
0
        &self,
552
0
        qualifier: &TableReference,
553
0
        name: &str,
554
0
    ) -> Result<&Field> {
555
0
        let idx = self
556
0
            .index_of_column_by_name(Some(qualifier), name)
557
0
            .ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?;
558
559
0
        Ok(self.field(idx))
560
0
    }
561
562
    /// Find the field with the given qualified column
563
0
    pub fn field_from_column(&self, column: &Column) -> Result<&Field> {
564
0
        match &column.relation {
565
0
            Some(r) => self.field_with_qualified_name(r, &column.name),
566
0
            None => self.field_with_unqualified_name(&column.name),
567
        }
568
0
    }
569
570
    /// Find the field with the given qualified column
571
0
    pub fn qualified_field_from_column(
572
0
        &self,
573
0
        column: &Column,
574
0
    ) -> Result<(Option<&TableReference>, &Field)> {
575
0
        self.qualified_field_with_name(column.relation.as_ref(), &column.name)
576
0
    }
577
578
    /// Find if the field exists with the given name
579
0
    pub fn has_column_with_unqualified_name(&self, name: &str) -> bool {
580
0
        self.fields().iter().any(|field| field.name() == name)
581
0
    }
582
583
    /// Find if the field exists with the given qualified name
584
0
    pub fn has_column_with_qualified_name(
585
0
        &self,
586
0
        qualifier: &TableReference,
587
0
        name: &str,
588
0
    ) -> bool {
589
0
        self.iter()
590
0
            .any(|(q, f)| q.map(|q| q.eq(qualifier)).unwrap_or(false) && f.name() == name)
591
0
    }
592
593
    /// Find if the field exists with the given qualified column
594
0
    pub fn has_column(&self, column: &Column) -> bool {
595
0
        match &column.relation {
596
0
            Some(r) => self.has_column_with_qualified_name(r, &column.name),
597
0
            None => self.has_column_with_unqualified_name(&column.name),
598
        }
599
0
    }
600
601
    /// Check to see if unqualified field names matches field names in Arrow schema
602
0
    pub fn matches_arrow_schema(&self, arrow_schema: &Schema) -> bool {
603
0
        self.inner
604
0
            .fields
605
0
            .iter()
606
0
            .zip(arrow_schema.fields().iter())
607
0
            .all(|(dffield, arrowfield)| dffield.name() == arrowfield.name())
608
0
    }
609
610
    /// Check to see if fields in 2 Arrow schemas are compatible
611
0
    pub fn check_arrow_schema_type_compatible(
612
0
        &self,
613
0
        arrow_schema: &Schema,
614
0
    ) -> Result<()> {
615
0
        let self_arrow_schema: Schema = self.into();
616
0
        self_arrow_schema
617
0
            .fields()
618
0
            .iter()
619
0
            .zip(arrow_schema.fields().iter())
620
0
            .try_for_each(|(l_field, r_field)| {
621
0
                if !can_cast_types(r_field.data_type(), l_field.data_type()) {
622
0
                    _plan_err!("Column {} (type: {}) is not compatible with column {} (type: {})",
623
0
                                r_field.name(),
624
0
                                r_field.data_type(),
625
0
                                l_field.name(),
626
0
                                l_field.data_type())
627
                } else {
628
0
                    Ok(())
629
                }
630
0
            })
631
0
    }
632
633
    /// Returns true if the two schemas have the same qualified named
634
    /// fields with logically equivalent data types. Returns false otherwise.
635
    ///
636
    /// Use [DFSchema]::equivalent_names_and_types for stricter semantic type
637
    /// equivalence checking.
638
0
    pub fn logically_equivalent_names_and_types(&self, other: &Self) -> bool {
639
0
        if self.fields().len() != other.fields().len() {
640
0
            return false;
641
0
        }
642
0
        let self_fields = self.iter();
643
0
        let other_fields = other.iter();
644
0
        self_fields.zip(other_fields).all(|((q1, f1), (q2, f2))| {
645
0
            q1 == q2
646
0
                && f1.name() == f2.name()
647
0
                && Self::datatype_is_logically_equal(f1.data_type(), f2.data_type())
648
0
        })
649
0
    }
650
651
    /// Returns true if the two schemas have the same qualified named
652
    /// fields with the same data types. Returns false otherwise.
653
    ///
654
    /// This is a specialized version of Eq that ignores differences
655
    /// in nullability and metadata.
656
    ///
657
    /// Use [DFSchema]::logically_equivalent_names_and_types for a weaker
658
    /// logical type checking, which for example would consider a dictionary
659
    /// encoded UTF8 array to be equivalent to a plain UTF8 array.
660
0
    pub fn equivalent_names_and_types(&self, other: &Self) -> bool {
661
0
        if self.fields().len() != other.fields().len() {
662
0
            return false;
663
0
        }
664
0
        let self_fields = self.iter();
665
0
        let other_fields = other.iter();
666
0
        self_fields.zip(other_fields).all(|((q1, f1), (q2, f2))| {
667
0
            q1 == q2
668
0
                && f1.name() == f2.name()
669
0
                && Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type())
670
0
        })
671
0
    }
672
673
    /// Checks if two [`DataType`]s are logically equal. This is a notably weaker constraint
674
    /// than datatype_is_semantically_equal in that a Dictionary<K,V> type is logically
675
    /// equal to a plain V type, but not semantically equal. Dictionary<K1, V1> is also
676
    /// logically equal to Dictionary<K2, V1>.
677
0
    pub fn datatype_is_logically_equal(dt1: &DataType, dt2: &DataType) -> bool {
678
0
        // check nested fields
679
0
        match (dt1, dt2) {
680
0
            (DataType::Dictionary(_, v1), DataType::Dictionary(_, v2)) => {
681
0
                v1.as_ref() == v2.as_ref()
682
            }
683
0
            (DataType::Dictionary(_, v1), othertype) => v1.as_ref() == othertype,
684
0
            (othertype, DataType::Dictionary(_, v1)) => v1.as_ref() == othertype,
685
0
            (DataType::List(f1), DataType::List(f2))
686
0
            | (DataType::LargeList(f1), DataType::LargeList(f2))
687
0
            | (DataType::FixedSizeList(f1, _), DataType::FixedSizeList(f2, _))
688
0
            | (DataType::Map(f1, _), DataType::Map(f2, _)) => {
689
0
                Self::field_is_logically_equal(f1, f2)
690
            }
691
0
            (DataType::Struct(fields1), DataType::Struct(fields2)) => {
692
0
                let iter1 = fields1.iter();
693
0
                let iter2 = fields2.iter();
694
0
                fields1.len() == fields2.len() &&
695
                        // all fields have to be the same
696
0
                    iter1
697
0
                    .zip(iter2)
698
0
                        .all(|(f1, f2)| Self::field_is_logically_equal(f1, f2))
699
            }
700
0
            (DataType::Union(fields1, _), DataType::Union(fields2, _)) => {
701
0
                let iter1 = fields1.iter();
702
0
                let iter2 = fields2.iter();
703
0
                fields1.len() == fields2.len() &&
704
                    // all fields have to be the same
705
0
                    iter1
706
0
                        .zip(iter2)
707
0
                        .all(|((t1, f1), (t2, f2))| t1 == t2 && Self::field_is_logically_equal(f1, f2))
708
            }
709
0
            _ => dt1 == dt2,
710
        }
711
0
    }
712
713
    /// Returns true of two [`DataType`]s are semantically equal (same
714
    /// name and type), ignoring both metadata and nullability.
715
    ///
716
    /// request to upstream: <https://github.com/apache/arrow-rs/issues/3199>
717
0
    fn datatype_is_semantically_equal(dt1: &DataType, dt2: &DataType) -> bool {
718
0
        // check nested fields
719
0
        match (dt1, dt2) {
720
0
            (DataType::Dictionary(k1, v1), DataType::Dictionary(k2, v2)) => {
721
0
                Self::datatype_is_semantically_equal(k1.as_ref(), k2.as_ref())
722
0
                    && Self::datatype_is_semantically_equal(v1.as_ref(), v2.as_ref())
723
            }
724
0
            (DataType::List(f1), DataType::List(f2))
725
0
            | (DataType::LargeList(f1), DataType::LargeList(f2))
726
0
            | (DataType::FixedSizeList(f1, _), DataType::FixedSizeList(f2, _))
727
0
            | (DataType::Map(f1, _), DataType::Map(f2, _)) => {
728
0
                Self::field_is_semantically_equal(f1, f2)
729
            }
730
0
            (DataType::Struct(fields1), DataType::Struct(fields2)) => {
731
0
                let iter1 = fields1.iter();
732
0
                let iter2 = fields2.iter();
733
0
                fields1.len() == fields2.len() &&
734
                        // all fields have to be the same
735
0
                    iter1
736
0
                    .zip(iter2)
737
0
                        .all(|(f1, f2)| Self::field_is_semantically_equal(f1, f2))
738
            }
739
0
            (DataType::Union(fields1, _), DataType::Union(fields2, _)) => {
740
0
                let iter1 = fields1.iter();
741
0
                let iter2 = fields2.iter();
742
0
                fields1.len() == fields2.len() &&
743
                    // all fields have to be the same
744
0
                    iter1
745
0
                        .zip(iter2)
746
0
                        .all(|((t1, f1), (t2, f2))| t1 == t2 && Self::field_is_semantically_equal(f1, f2))
747
            }
748
            (
749
0
                DataType::Decimal128(_l_precision, _l_scale),
750
0
                DataType::Decimal128(_r_precision, _r_scale),
751
0
            ) => true,
752
            (
753
0
                DataType::Decimal256(_l_precision, _l_scale),
754
0
                DataType::Decimal256(_r_precision, _r_scale),
755
0
            ) => true,
756
0
            _ => dt1 == dt2,
757
        }
758
0
    }
759
760
0
    fn field_is_logically_equal(f1: &Field, f2: &Field) -> bool {
761
0
        f1.name() == f2.name()
762
0
            && Self::datatype_is_logically_equal(f1.data_type(), f2.data_type())
763
0
    }
764
765
0
    fn field_is_semantically_equal(f1: &Field, f2: &Field) -> bool {
766
0
        f1.name() == f2.name()
767
0
            && Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type())
768
0
    }
769
770
    /// Strip all field qualifier in schema
771
0
    pub fn strip_qualifiers(self) -> Self {
772
0
        DFSchema {
773
0
            field_qualifiers: vec![None; self.inner.fields.len()],
774
0
            inner: self.inner,
775
0
            functional_dependencies: self.functional_dependencies,
776
0
        }
777
0
    }
778
779
    /// Replace all field qualifier with new value in schema
780
0
    pub fn replace_qualifier(self, qualifier: impl Into<TableReference>) -> Self {
781
0
        let qualifier = qualifier.into();
782
0
        DFSchema {
783
0
            field_qualifiers: vec![Some(qualifier); self.inner.fields.len()],
784
0
            inner: self.inner,
785
0
            functional_dependencies: self.functional_dependencies,
786
0
        }
787
0
    }
788
789
    /// Get list of fully-qualified field names in this schema
790
0
    pub fn field_names(&self) -> Vec<String> {
791
0
        self.iter()
792
0
            .map(|(qualifier, field)| qualified_name(qualifier, field.name()))
793
0
            .collect::<Vec<_>>()
794
0
    }
795
796
    /// Get metadata of this schema
797
0
    pub fn metadata(&self) -> &HashMap<String, String> {
798
0
        &self.inner.metadata
799
0
    }
800
801
    /// Get functional dependencies
802
0
    pub fn functional_dependencies(&self) -> &FunctionalDependencies {
803
0
        &self.functional_dependencies
804
0
    }
805
806
    /// Iterate over the qualifiers and fields in the DFSchema
807
0
    pub fn iter(&self) -> impl Iterator<Item = (Option<&TableReference>, &FieldRef)> {
808
0
        self.field_qualifiers
809
0
            .iter()
810
0
            .zip(self.inner.fields().iter())
811
0
            .map(|(qualifier, field)| (qualifier.as_ref(), field))
812
0
    }
813
}
814
815
impl From<DFSchema> for Schema {
816
    /// Convert DFSchema into a Schema
817
0
    fn from(df_schema: DFSchema) -> Self {
818
0
        let fields: Fields = df_schema.inner.fields.clone();
819
0
        Schema::new_with_metadata(fields, df_schema.inner.metadata.clone())
820
0
    }
821
}
822
823
impl From<&DFSchema> for Schema {
824
    /// Convert DFSchema reference into a Schema
825
0
    fn from(df_schema: &DFSchema) -> Self {
826
0
        let fields: Fields = df_schema.inner.fields.clone();
827
0
        Schema::new_with_metadata(fields, df_schema.inner.metadata.clone())
828
0
    }
829
}
830
831
/// Allow DFSchema to be converted into an Arrow `&Schema`
832
impl AsRef<Schema> for DFSchema {
833
0
    fn as_ref(&self) -> &Schema {
834
0
        self.as_arrow()
835
0
    }
836
}
837
838
/// Allow DFSchema to be converted into an Arrow `&SchemaRef` (to clone, for
839
/// example)
840
impl AsRef<SchemaRef> for DFSchema {
841
0
    fn as_ref(&self) -> &SchemaRef {
842
0
        self.inner()
843
0
    }
844
}
845
846
/// Create a `DFSchema` from an Arrow schema
847
impl TryFrom<Schema> for DFSchema {
848
    type Error = DataFusionError;
849
0
    fn try_from(schema: Schema) -> Result<Self, Self::Error> {
850
0
        Self::try_from(Arc::new(schema))
851
0
    }
852
}
853
854
impl TryFrom<SchemaRef> for DFSchema {
855
    type Error = DataFusionError;
856
0
    fn try_from(schema: SchemaRef) -> Result<Self, Self::Error> {
857
0
        let field_count = schema.fields.len();
858
0
        let dfschema = Self {
859
0
            inner: schema,
860
0
            field_qualifiers: vec![None; field_count],
861
0
            functional_dependencies: FunctionalDependencies::empty(),
862
0
        };
863
0
        Ok(dfschema)
864
0
    }
865
}
866
867
impl From<DFSchema> for SchemaRef {
868
0
    fn from(df_schema: DFSchema) -> Self {
869
0
        SchemaRef::new(df_schema.into())
870
0
    }
871
}
872
873
// Hashing refers to a subset of fields considered in PartialEq.
874
impl Hash for DFSchema {
875
0
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
876
0
        self.inner.fields.hash(state);
877
0
        self.inner.metadata.len().hash(state); // HashMap is not hashable
878
0
    }
879
}
880
881
/// Convenience trait to convert Schema like things to DFSchema and DFSchemaRef with fewer keystrokes
882
pub trait ToDFSchema
883
where
884
    Self: Sized,
885
{
886
    /// Attempt to create a DSSchema
887
    fn to_dfschema(self) -> Result<DFSchema>;
888
889
    /// Attempt to create a DSSchemaRef
890
0
    fn to_dfschema_ref(self) -> Result<DFSchemaRef> {
891
0
        Ok(Arc::new(self.to_dfschema()?))
892
0
    }
893
}
894
895
impl ToDFSchema for Schema {
896
0
    fn to_dfschema(self) -> Result<DFSchema> {
897
0
        DFSchema::try_from(self)
898
0
    }
899
}
900
901
impl ToDFSchema for SchemaRef {
902
0
    fn to_dfschema(self) -> Result<DFSchema> {
903
0
        DFSchema::try_from(self)
904
0
    }
905
}
906
907
impl ToDFSchema for Vec<Field> {
908
    fn to_dfschema(self) -> Result<DFSchema> {
909
        let field_count = self.len();
910
        let schema = Schema {
911
            fields: self.into(),
912
            metadata: HashMap::new(),
913
        };
914
        let dfschema = DFSchema {
915
            inner: schema.into(),
916
            field_qualifiers: vec![None; field_count],
917
            functional_dependencies: FunctionalDependencies::empty(),
918
        };
919
        Ok(dfschema)
920
    }
921
}
922
923
impl Display for DFSchema {
924
0
    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
925
0
        write!(
926
0
            f,
927
0
            "fields:[{}], metadata:{:?}",
928
0
            self.iter()
929
0
                .map(|(q, f)| qualified_name(q, f.name()))
930
0
                .collect::<Vec<String>>()
931
0
                .join(", "),
932
0
            self.inner.metadata
933
0
        )
934
0
    }
935
}
936
937
/// Provides schema information needed by certain methods of `Expr`
938
/// (defined in the datafusion-common crate).
939
///
940
/// Note that this trait is implemented for &[DFSchema] which is
941
/// widely used in the DataFusion codebase.
942
pub trait ExprSchema: std::fmt::Debug {
943
    /// Is this column reference nullable?
944
    fn nullable(&self, col: &Column) -> Result<bool>;
945
946
    /// What is the datatype of this column?
947
    fn data_type(&self, col: &Column) -> Result<&DataType>;
948
949
    /// Returns the column's optional metadata.
950
    fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>>;
951
952
    /// Return the coulmn's datatype and nullability
953
    fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)>;
954
}
955
956
// Implement `ExprSchema` for `Arc<DFSchema>`
957
impl<P: AsRef<DFSchema> + std::fmt::Debug> ExprSchema for P {
958
0
    fn nullable(&self, col: &Column) -> Result<bool> {
959
0
        self.as_ref().nullable(col)
960
0
    }
961
962
0
    fn data_type(&self, col: &Column) -> Result<&DataType> {
963
0
        self.as_ref().data_type(col)
964
0
    }
965
966
0
    fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>> {
967
0
        ExprSchema::metadata(self.as_ref(), col)
968
0
    }
969
970
0
    fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)> {
971
0
        self.as_ref().data_type_and_nullable(col)
972
0
    }
973
}
974
975
impl ExprSchema for DFSchema {
976
0
    fn nullable(&self, col: &Column) -> Result<bool> {
977
0
        Ok(self.field_from_column(col)?.is_nullable())
978
0
    }
979
980
0
    fn data_type(&self, col: &Column) -> Result<&DataType> {
981
0
        Ok(self.field_from_column(col)?.data_type())
982
0
    }
983
984
0
    fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>> {
985
0
        Ok(self.field_from_column(col)?.metadata())
986
0
    }
987
988
0
    fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)> {
989
0
        let field = self.field_from_column(col)?;
990
0
        Ok((field.data_type(), field.is_nullable()))
991
0
    }
992
}
993
994
/// DataFusion-specific extensions to [`Schema`].
995
pub trait SchemaExt {
996
    /// This is a specialized version of Eq that ignores differences
997
    /// in nullability and metadata.
998
    ///
999
    /// It works the same as [`DFSchema::equivalent_names_and_types`].
1000
    fn equivalent_names_and_types(&self, other: &Self) -> bool;
1001
1002
    /// Returns true if the two schemas have the same qualified named
1003
    /// fields with logically equivalent data types. Returns false otherwise.
1004
    ///
1005
    /// Use [DFSchema]::equivalent_names_and_types for stricter semantic type
1006
    /// equivalence checking.
1007
    fn logically_equivalent_names_and_types(&self, other: &Self) -> bool;
1008
}
1009
1010
impl SchemaExt for Schema {
1011
0
    fn equivalent_names_and_types(&self, other: &Self) -> bool {
1012
0
        if self.fields().len() != other.fields().len() {
1013
0
            return false;
1014
0
        }
1015
0
1016
0
        self.fields()
1017
0
            .iter()
1018
0
            .zip(other.fields().iter())
1019
0
            .all(|(f1, f2)| {
1020
0
                f1.name() == f2.name()
1021
0
                    && DFSchema::datatype_is_semantically_equal(
1022
0
                        f1.data_type(),
1023
0
                        f2.data_type(),
1024
0
                    )
1025
0
            })
1026
0
    }
1027
1028
0
    fn logically_equivalent_names_and_types(&self, other: &Self) -> bool {
1029
0
        if self.fields().len() != other.fields().len() {
1030
0
            return false;
1031
0
        }
1032
0
1033
0
        self.fields()
1034
0
            .iter()
1035
0
            .zip(other.fields().iter())
1036
0
            .all(|(f1, f2)| {
1037
0
                f1.name() == f2.name()
1038
0
                    && DFSchema::datatype_is_logically_equal(
1039
0
                        f1.data_type(),
1040
0
                        f2.data_type(),
1041
0
                    )
1042
0
            })
1043
0
    }
1044
}
1045
1046
0
pub fn qualified_name(qualifier: Option<&TableReference>, name: &str) -> String {
1047
0
    match qualifier {
1048
0
        Some(q) => format!("{}.{}", q, name),
1049
0
        None => name.to_string(),
1050
    }
1051
0
}
1052
1053
#[cfg(test)]
1054
mod tests {
1055
    use crate::assert_contains;
1056
1057
    use super::*;
1058
1059
    #[test]
1060
    fn qualifier_in_name() -> Result<()> {
1061
        let col = Column::from_name("t1.c0");
1062
        let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1063
        // lookup with unqualified name "t1.c0"
1064
        let err = schema.index_of_column(&col).unwrap_err();
1065
        assert_eq!(
1066
            err.strip_backtrace(),
1067
            "Schema error: No field named \"t1.c0\". Valid fields are t1.c0, t1.c1."
1068
        );
1069
        Ok(())
1070
    }
1071
1072
    #[test]
1073
    fn quoted_qualifiers_in_name() -> Result<()> {
1074
        let col = Column::from_name("t1.c0");
1075
        let schema = DFSchema::try_from_qualified_schema(
1076
            "t1",
1077
            &Schema::new(vec![
1078
                Field::new("CapitalColumn", DataType::Boolean, true),
1079
                Field::new("field.with.period", DataType::Boolean, true),
1080
            ]),
1081
        )?;
1082
1083
        // lookup with unqualified name "t1.c0"
1084
        let err = schema.index_of_column(&col).unwrap_err();
1085
        assert_eq!(
1086
            err.strip_backtrace(),
1087
            "Schema error: No field named \"t1.c0\". Valid fields are t1.\"CapitalColumn\", t1.\"field.with.period\"."
1088
        );
1089
        Ok(())
1090
    }
1091
1092
    #[test]
1093
    fn from_unqualified_schema() -> Result<()> {
1094
        let schema = DFSchema::try_from(test_schema_1())?;
1095
        assert_eq!("fields:[c0, c1], metadata:{}", schema.to_string());
1096
        Ok(())
1097
    }
1098
1099
    #[test]
1100
    fn from_qualified_schema() -> Result<()> {
1101
        let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1102
        assert_eq!("fields:[t1.c0, t1.c1], metadata:{}", schema.to_string());
1103
        Ok(())
1104
    }
1105
1106
    #[test]
1107
    fn test_from_field_specific_qualified_schema() -> Result<()> {
1108
        let schema = DFSchema::from_field_specific_qualified_schema(
1109
            vec![Some("t1".into()), None],
1110
            &Arc::new(Schema::new(vec![
1111
                Field::new("c0", DataType::Boolean, true),
1112
                Field::new("c1", DataType::Boolean, true),
1113
            ])),
1114
        )?;
1115
        assert_eq!("fields:[t1.c0, c1], metadata:{}", schema.to_string());
1116
        Ok(())
1117
    }
1118
1119
    #[test]
1120
    fn test_from_qualified_fields() -> Result<()> {
1121
        let schema = DFSchema::new_with_metadata(
1122
            vec![
1123
                (
1124
                    Some("t0".into()),
1125
                    Arc::new(Field::new("c0", DataType::Boolean, true)),
1126
                ),
1127
                (None, Arc::new(Field::new("c1", DataType::Boolean, true))),
1128
            ],
1129
            HashMap::new(),
1130
        )?;
1131
        assert_eq!("fields:[t0.c0, c1], metadata:{}", schema.to_string());
1132
        Ok(())
1133
    }
1134
1135
    #[test]
1136
    fn from_qualified_schema_into_arrow_schema() -> Result<()> {
1137
        let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1138
        let arrow_schema: Schema = schema.into();
1139
        let expected = "Field { name: \"c0\", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
1140
        Field { name: \"c1\", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }";
1141
        assert_eq!(expected, arrow_schema.to_string());
1142
        Ok(())
1143
    }
1144
1145
    #[test]
1146
    fn join_qualified() -> Result<()> {
1147
        let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1148
        let right = DFSchema::try_from_qualified_schema("t2", &test_schema_1())?;
1149
        let join = left.join(&right)?;
1150
        assert_eq!(
1151
            "fields:[t1.c0, t1.c1, t2.c0, t2.c1], metadata:{}",
1152
            join.to_string()
1153
        );
1154
        // test valid access
1155
        assert!(join
1156
            .field_with_qualified_name(&TableReference::bare("t1"), "c0")
1157
            .is_ok());
1158
        assert!(join
1159
            .field_with_qualified_name(&TableReference::bare("t2"), "c0")
1160
            .is_ok());
1161
        // test invalid access
1162
        assert!(join.field_with_unqualified_name("c0").is_err());
1163
        assert!(join.field_with_unqualified_name("t1.c0").is_err());
1164
        assert!(join.field_with_unqualified_name("t2.c0").is_err());
1165
        Ok(())
1166
    }
1167
1168
    #[test]
1169
    fn join_qualified_duplicate() -> Result<()> {
1170
        let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1171
        let right = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1172
        let join = left.join(&right);
1173
        assert_eq!(
1174
            join.unwrap_err().strip_backtrace(),
1175
            "Schema error: Schema contains duplicate qualified field name t1.c0",
1176
        );
1177
        Ok(())
1178
    }
1179
1180
    #[test]
1181
    fn join_unqualified_duplicate() -> Result<()> {
1182
        let left = DFSchema::try_from(test_schema_1())?;
1183
        let right = DFSchema::try_from(test_schema_1())?;
1184
        let join = left.join(&right);
1185
        assert_eq!(
1186
            join.unwrap_err().strip_backtrace(),
1187
            "Schema error: Schema contains duplicate unqualified field name c0"
1188
        );
1189
        Ok(())
1190
    }
1191
1192
    #[test]
1193
    fn join_mixed() -> Result<()> {
1194
        let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1195
        let right = DFSchema::try_from(test_schema_2())?;
1196
        let join = left.join(&right)?;
1197
        assert_eq!(
1198
            "fields:[t1.c0, t1.c1, c100, c101], metadata:{}",
1199
            join.to_string()
1200
        );
1201
        // test valid access
1202
        assert!(join
1203
            .field_with_qualified_name(&TableReference::bare("t1"), "c0")
1204
            .is_ok());
1205
        assert!(join.field_with_unqualified_name("c0").is_ok());
1206
        assert!(join.field_with_unqualified_name("c100").is_ok());
1207
        assert!(join.field_with_name(None, "c100").is_ok());
1208
        // test invalid access
1209
        assert!(join.field_with_unqualified_name("t1.c0").is_err());
1210
        assert!(join.field_with_unqualified_name("t1.c100").is_err());
1211
        assert!(join
1212
            .field_with_qualified_name(&TableReference::bare(""), "c100")
1213
            .is_err());
1214
        Ok(())
1215
    }
1216
1217
    #[test]
1218
    fn join_mixed_duplicate() -> Result<()> {
1219
        let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1220
        let right = DFSchema::try_from(test_schema_1())?;
1221
        let join = left.join(&right);
1222
        assert_contains!(join.unwrap_err().to_string(),
1223
                         "Schema error: Schema contains qualified \
1224
                          field name t1.c0 and unqualified field name c0 which would be ambiguous");
1225
        Ok(())
1226
    }
1227
1228
    #[test]
1229
    fn helpful_error_messages() -> Result<()> {
1230
        let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1231
        let expected_help = "Valid fields are t1.c0, t1.c1.";
1232
        assert_contains!(
1233
            schema
1234
                .field_with_qualified_name(&TableReference::bare("x"), "y")
1235
                .unwrap_err()
1236
                .to_string(),
1237
            expected_help
1238
        );
1239
        assert_contains!(
1240
            schema
1241
                .field_with_unqualified_name("y")
1242
                .unwrap_err()
1243
                .to_string(),
1244
            expected_help
1245
        );
1246
        assert!(schema.index_of_column_by_name(None, "y").is_none());
1247
        assert!(schema.index_of_column_by_name(None, "t1.c0").is_none());
1248
1249
        Ok(())
1250
    }
1251
1252
    #[test]
1253
    fn select_without_valid_fields() {
1254
        let schema = DFSchema::empty();
1255
1256
        let col = Column::from_qualified_name("t1.c0");
1257
        let err = schema.index_of_column(&col).unwrap_err();
1258
        assert_eq!(err.strip_backtrace(), "Schema error: No field named t1.c0.");
1259
1260
        // the same check without qualifier
1261
        let col = Column::from_name("c0");
1262
        let err = schema.index_of_column(&col).err().unwrap();
1263
        assert_eq!(err.strip_backtrace(), "Schema error: No field named c0.");
1264
    }
1265
1266
    #[test]
1267
    fn into() {
1268
        // Demonstrate how to convert back and forth between Schema, SchemaRef, DFSchema, and DFSchemaRef
1269
        let arrow_schema = Schema::new_with_metadata(
1270
            vec![Field::new("c0", DataType::Int64, true)],
1271
            test_metadata(),
1272
        );
1273
        let arrow_schema_ref = Arc::new(arrow_schema.clone());
1274
1275
        let df_schema = DFSchema {
1276
            inner: Arc::clone(&arrow_schema_ref),
1277
            field_qualifiers: vec![None; arrow_schema_ref.fields.len()],
1278
            functional_dependencies: FunctionalDependencies::empty(),
1279
        };
1280
        let df_schema_ref = Arc::new(df_schema.clone());
1281
1282
        {
1283
            let arrow_schema = arrow_schema.clone();
1284
            let arrow_schema_ref = Arc::clone(&arrow_schema_ref);
1285
1286
            assert_eq!(df_schema, arrow_schema.to_dfschema().unwrap());
1287
            assert_eq!(df_schema, arrow_schema_ref.to_dfschema().unwrap());
1288
        }
1289
1290
        {
1291
            let arrow_schema = arrow_schema.clone();
1292
            let arrow_schema_ref = Arc::clone(&arrow_schema_ref);
1293
1294
            assert_eq!(df_schema_ref, arrow_schema.to_dfschema_ref().unwrap());
1295
            assert_eq!(df_schema_ref, arrow_schema_ref.to_dfschema_ref().unwrap());
1296
        }
1297
1298
        // Now, consume the refs
1299
        assert_eq!(df_schema_ref, arrow_schema.to_dfschema_ref().unwrap());
1300
        assert_eq!(df_schema_ref, arrow_schema_ref.to_dfschema_ref().unwrap());
1301
    }
1302
1303
    fn test_schema_1() -> Schema {
1304
        Schema::new(vec![
1305
            Field::new("c0", DataType::Boolean, true),
1306
            Field::new("c1", DataType::Boolean, true),
1307
        ])
1308
    }
1309
    #[test]
1310
    fn test_dfschema_to_schema_conversion() {
1311
        let mut a_metadata = HashMap::new();
1312
        a_metadata.insert("key".to_string(), "value".to_string());
1313
        let a_field = Field::new("a", DataType::Int64, false).with_metadata(a_metadata);
1314
1315
        let mut b_metadata = HashMap::new();
1316
        b_metadata.insert("key".to_string(), "value".to_string());
1317
        let b_field = Field::new("b", DataType::Int64, false).with_metadata(b_metadata);
1318
1319
        let schema = Arc::new(Schema::new(vec![a_field, b_field]));
1320
1321
        let df_schema = DFSchema {
1322
            inner: Arc::clone(&schema),
1323
            field_qualifiers: vec![None; schema.fields.len()],
1324
            functional_dependencies: FunctionalDependencies::empty(),
1325
        };
1326
1327
        assert_eq!(df_schema.inner.metadata(), schema.metadata())
1328
    }
1329
1330
    #[test]
1331
    fn test_contain_column() -> Result<()> {
1332
        // qualified exists
1333
        {
1334
            let col = Column::from_qualified_name("t1.c0");
1335
            let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1336
            assert!(schema.is_column_from_schema(&col));
1337
        }
1338
1339
        // qualified not exists
1340
        {
1341
            let col = Column::from_qualified_name("t1.c2");
1342
            let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1343
            assert!(!schema.is_column_from_schema(&col));
1344
        }
1345
1346
        // unqualified exists
1347
        {
1348
            let col = Column::from_name("c0");
1349
            let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1350
            assert!(schema.is_column_from_schema(&col));
1351
        }
1352
1353
        // unqualified not exists
1354
        {
1355
            let col = Column::from_name("c2");
1356
            let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
1357
            assert!(!schema.is_column_from_schema(&col));
1358
        }
1359
1360
        Ok(())
1361
    }
1362
1363
    fn test_schema_2() -> Schema {
1364
        Schema::new(vec![
1365
            Field::new("c100", DataType::Boolean, true),
1366
            Field::new("c101", DataType::Boolean, true),
1367
        ])
1368
    }
1369
1370
    fn test_metadata() -> HashMap<String, String> {
1371
        test_metadata_n(2)
1372
    }
1373
1374
    fn test_metadata_n(n: usize) -> HashMap<String, String> {
1375
        (0..n).map(|i| (format!("k{i}"), format!("v{i}"))).collect()
1376
    }
1377
}