Coverage Report

Created: 2024-10-13 08:39

/Users/andrewlamb/Software/datafusion/datafusion/expr/src/logical_plan/dml.rs
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::cmp::Ordering;
19
use std::collections::HashMap;
20
use std::fmt::{self, Debug, Display, Formatter};
21
use std::hash::{Hash, Hasher};
22
use std::sync::Arc;
23
24
use arrow::datatypes::{DataType, Field, Schema};
25
use datafusion_common::file_options::file_type::FileType;
26
use datafusion_common::{DFSchemaRef, TableReference};
27
28
use crate::LogicalPlan;
29
30
/// Operator that copies the contents of a database to file(s)
31
#[derive(Clone)]
32
pub struct CopyTo {
33
    /// The relation that determines the tuples to write to the output file(s)
34
    pub input: Arc<LogicalPlan>,
35
    /// The location to write the file(s)
36
    pub output_url: String,
37
    /// Determines which, if any, columns should be used for hive-style partitioned writes
38
    pub partition_by: Vec<String>,
39
    /// File type trait
40
    pub file_type: Arc<dyn FileType>,
41
    /// SQL Options that can affect the formats
42
    pub options: HashMap<String, String>,
43
}
44
45
impl Debug for CopyTo {
46
0
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
47
0
        f.debug_struct("CopyTo")
48
0
            .field("input", &self.input)
49
0
            .field("output_url", &self.output_url)
50
0
            .field("partition_by", &self.partition_by)
51
0
            .field("file_type", &"...")
52
0
            .field("options", &self.options)
53
0
            .finish_non_exhaustive()
54
0
    }
55
}
56
57
// Implement PartialEq manually
58
impl PartialEq for CopyTo {
59
0
    fn eq(&self, other: &Self) -> bool {
60
0
        self.input == other.input && self.output_url == other.output_url
61
0
    }
62
}
63
64
// Implement Eq (no need for additional logic over PartialEq)
65
impl Eq for CopyTo {}
66
67
// Manual implementation needed because of `file_type` and `options` fields.
68
// Comparison excludes these field.
69
impl PartialOrd for CopyTo {
70
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
71
0
        match self.input.partial_cmp(&other.input) {
72
0
            Some(Ordering::Equal) => match self.output_url.partial_cmp(&other.output_url)
73
            {
74
                Some(Ordering::Equal) => {
75
0
                    self.partition_by.partial_cmp(&other.partition_by)
76
                }
77
0
                cmp => cmp,
78
            },
79
0
            cmp => cmp,
80
        }
81
0
    }
82
}
83
84
// Implement Hash manually
85
impl Hash for CopyTo {
86
0
    fn hash<H: Hasher>(&self, state: &mut H) {
87
0
        self.input.hash(state);
88
0
        self.output_url.hash(state);
89
0
    }
90
}
91
92
/// The operator that modifies the content of a database (adapted from
93
/// substrait WriteRel)
94
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
95
pub struct DmlStatement {
96
    /// The table name
97
    pub table_name: TableReference,
98
    /// The schema of the table (must align with Rel input)
99
    pub table_schema: DFSchemaRef,
100
    /// The type of operation to perform
101
    pub op: WriteOp,
102
    /// The relation that determines the tuples to add/remove/modify the schema must match with table_schema
103
    pub input: Arc<LogicalPlan>,
104
    /// The schema of the output relation
105
    pub output_schema: DFSchemaRef,
106
}
107
108
impl DmlStatement {
109
    /// Creates a new DML statement with the output schema set to a single `count` column.
110
0
    pub fn new(
111
0
        table_name: TableReference,
112
0
        table_schema: DFSchemaRef,
113
0
        op: WriteOp,
114
0
        input: Arc<LogicalPlan>,
115
0
    ) -> Self {
116
0
        Self {
117
0
            table_name,
118
0
            table_schema,
119
0
            op,
120
0
            input,
121
0
122
0
            // The output schema is always a single column with the number of rows affected
123
0
            output_schema: make_count_schema(),
124
0
        }
125
0
    }
126
127
    /// Return a descriptive name of this [`DmlStatement`]
128
0
    pub fn name(&self) -> &str {
129
0
        self.op.name()
130
0
    }
131
}
132
133
// Manual implementation needed because of `table_schema` and `output_schema` fields.
134
// Comparison excludes these fields.
135
impl PartialOrd for DmlStatement {
136
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
137
0
        match self.table_name.partial_cmp(&other.table_name) {
138
0
            Some(Ordering::Equal) => match self.op.partial_cmp(&other.op) {
139
0
                Some(Ordering::Equal) => self.input.partial_cmp(&other.input),
140
0
                cmp => cmp,
141
            },
142
0
            cmp => cmp,
143
        }
144
0
    }
145
}
146
147
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
148
pub enum WriteOp {
149
    Insert(InsertOp),
150
    Delete,
151
    Update,
152
    Ctas,
153
}
154
155
impl WriteOp {
156
    /// Return a descriptive name of this [`WriteOp`]
157
0
    pub fn name(&self) -> &str {
158
0
        match self {
159
0
            WriteOp::Insert(insert) => insert.name(),
160
0
            WriteOp::Delete => "Delete",
161
0
            WriteOp::Update => "Update",
162
0
            WriteOp::Ctas => "Ctas",
163
        }
164
0
    }
165
}
166
167
impl Display for WriteOp {
168
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
169
0
        write!(f, "{}", self.name())
170
0
    }
171
}
172
173
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)]
174
pub enum InsertOp {
175
    /// Appends new rows to the existing table without modifying any
176
    /// existing rows. This corresponds to the SQL `INSERT INTO` query.
177
    Append,
178
    /// Overwrites all existing rows in the table with the new rows.
179
    /// This corresponds to the SQL `INSERT OVERWRITE` query.
180
    Overwrite,
181
    /// If any existing rows collides with the inserted rows (typically based
182
    /// on a unique key or primary key), those existing rows are replaced.
183
    /// This corresponds to the SQL `REPLACE INTO` query and its equivalents.
184
    Replace,
185
}
186
187
impl InsertOp {
188
    /// Return a descriptive name of this [`InsertOp`]
189
0
    pub fn name(&self) -> &str {
190
0
        match self {
191
0
            InsertOp::Append => "Insert Into",
192
0
            InsertOp::Overwrite => "Insert Overwrite",
193
0
            InsertOp::Replace => "Replace Into",
194
        }
195
0
    }
196
}
197
198
impl Display for InsertOp {
199
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
200
0
        write!(f, "{}", self.name())
201
0
    }
202
}
203
204
0
fn make_count_schema() -> DFSchemaRef {
205
0
    Arc::new(
206
0
        Schema::new(vec![Field::new("count", DataType::UInt64, false)])
207
0
            .try_into()
208
0
            .unwrap(),
209
0
    )
210
0
}