-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathcte.slt
653 lines (624 loc) · 13.5 KB
/
cte.slt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
query II
select * from (WITH source AS (select 1 as e) SELECT * FROM source) t1, (WITH source AS (select 1 as e) SELECT * FROM source) t2
----
1 1
# Ensure table aliases can be case sensitive
query I
WITH "T" AS (SELECT 1 a) SELECT "T".* FROM "T"
----
1
# Ensure table aliases can be case sensitive
query TT
EXPLAIN WITH "NUMBERS" AS (SELECT 1 as a, 2 as b, 3 as c) SELECT "NUMBERS".* FROM "NUMBERS"
----
logical_plan
Projection: NUMBERS.a, NUMBERS.b, NUMBERS.c
--SubqueryAlias: NUMBERS
----Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c
------EmptyRelation
physical_plan
ProjectionExec: expr=[1 as a, 2 as b, 3 as c]
--PlaceholderRowExec
# enable recursive CTEs
statement ok
set datafusion.execution.enable_recursive_ctes = true;
# trivial recursive CTE works
query I rowsort
WITH RECURSIVE nodes AS (
SELECT 1 as id
UNION ALL
SELECT id + 1 as id
FROM nodes
WHERE id < 10
)
SELECT * FROM nodes
----
1
10
2
3
4
5
6
7
8
9
# explain trivial recursive CTE
query TT
EXPLAIN WITH RECURSIVE nodes AS (
SELECT 1 as id
UNION ALL
SELECT id + 1 as id
FROM nodes
WHERE id < 10
)
SELECT * FROM nodes
----
logical_plan
Projection: nodes.id
--SubqueryAlias: nodes
----RecursiveQuery: is_distinct=false
------Projection: Int64(1) AS id
--------EmptyRelation
------Projection: nodes.id + Int64(1) AS id
--------Filter: nodes.id < Int64(10)
----------TableScan: nodes
physical_plan
RecursiveQueryExec: name=nodes, is_distinct=false
--ProjectionExec: expr=[1 as id]
----PlaceholderRowExec
--CoalescePartitionsExec
----ProjectionExec: expr=[id@0 + 1 as id]
------CoalesceBatchesExec: target_batch_size=8192
--------FilterExec: id@0 < 10
----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
------------WorkTableExec: name=nodes
# setup
statement ok
CREATE EXTERNAL TABLE balance STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/balance.csv'
# setup
statement ok
CREATE EXTERNAL TABLE growth STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/growth.csv'
# setup
statement ok
set datafusion.execution.batch_size = 2;
# recursive CTE with static term derived from table works.
# use explain to ensure that batch size is set to 2. This should produce multiple batches per iteration since the input
# table 'balances' has 4 rows
query TT
EXPLAIN WITH RECURSIVE balances AS (
SELECT * from balance
UNION ALL
SELECT time + 1 as time, name, account_balance + 10 as account_balance
FROM balances
WHERE time < 10
)
SELECT * FROM balances
ORDER BY time, name, account_balance
----
logical_plan
Sort: balances.time ASC NULLS LAST, balances.name ASC NULLS LAST, balances.account_balance ASC NULLS LAST
--Projection: balances.time, balances.name, balances.account_balance
----SubqueryAlias: balances
------RecursiveQuery: is_distinct=false
--------Projection: balance.time, balance.name, balance.account_balance
----------TableScan: balance
--------Projection: balances.time + Int64(1) AS time, balances.name, balances.account_balance + Int64(10) AS account_balance
----------Filter: balances.time < Int64(10)
------------TableScan: balances
physical_plan
SortExec: expr=[time@0 ASC NULLS LAST,name@1 ASC NULLS LAST,account_balance@2 ASC NULLS LAST]
--RecursiveQueryExec: name=balances, is_distinct=false
----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/recursive_cte/balance.csv]]}, projection=[time, name, account_balance], has_header=true
----CoalescePartitionsExec
------ProjectionExec: expr=[time@0 + 1 as time, name@1 as name, account_balance@2 + 10 as account_balance]
--------CoalesceBatchesExec: target_batch_size=2
----------FilterExec: time@0 < 10
------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
--------------WorkTableExec: name=balances
# recursive CTE with static term derived from table works
# note that this is run with batch size set to 2. This should produce multiple batches per iteration since the input
# table 'balances' has 4 rows
query ITI
WITH RECURSIVE balances AS (
SELECT * from balance
UNION ALL
SELECT time + 1 as time, name, account_balance + 10 as account_balance
FROM balances
WHERE time < 10
)
SELECT * FROM balances
ORDER BY time, name, account_balance
----
1 John 100
1 Tim 200
2 John 110
2 John 300
2 Tim 210
2 Tim 400
3 John 120
3 John 310
3 Tim 220
3 Tim 410
4 John 130
4 John 320
4 Tim 230
4 Tim 420
5 John 140
5 John 330
5 Tim 240
5 Tim 430
6 John 150
6 John 340
6 Tim 250
6 Tim 440
7 John 160
7 John 350
7 Tim 260
7 Tim 450
8 John 170
8 John 360
8 Tim 270
8 Tim 460
9 John 180
9 John 370
9 Tim 280
9 Tim 470
10 John 190
10 John 380
10 Tim 290
10 Tim 480
# reset batch size to default
statement ok
set datafusion.execution.batch_size = 8182;
# recursive CTE with recursive join works
query ITI
WITH RECURSIVE balances AS (
SELECT time as time, name as name, account_balance as account_balance
FROM balance
UNION ALL
SELECT time + 1 as time, balances.name, account_balance + growth.account_growth as account_balance
FROM balances
JOIN growth
ON balances.name = growth.name
WHERE time < 10
)
SELECT * FROM balances
ORDER BY time, name, account_balance
----
1 John 100
1 Tim 200
2 John 103
2 John 300
2 Tim 220
2 Tim 400
3 John 106
3 John 303
3 Tim 240
3 Tim 420
4 John 109
4 John 306
4 Tim 260
4 Tim 440
5 John 112
5 John 309
5 Tim 280
5 Tim 460
6 John 115
6 John 312
6 Tim 300
6 Tim 480
7 John 118
7 John 315
7 Tim 320
7 Tim 500
8 John 121
8 John 318
8 Tim 340
8 Tim 520
9 John 124
9 John 321
9 Tim 360
9 Tim 540
10 John 127
10 John 324
10 Tim 380
10 Tim 560
# recursive CTE with aggregations works
query I rowsort
WITH RECURSIVE nodes AS (
SELECT 1 as id
UNION ALL
SELECT id + 1 as id
FROM nodes
WHERE id < 10
)
SELECT sum(id) FROM nodes
----
55
# setup
statement ok
CREATE TABLE t(a BIGINT) AS VALUES(1),(2),(3);
# referencing CTE multiple times does not error
query II rowsort
WITH RECURSIVE my_cte AS (
SELECT a from t
UNION ALL
SELECT a+2 as a
FROM my_cte
WHERE a<5
)
SELECT * FROM my_cte t1, my_cte
----
1 1
1 2
1 3
1 3
1 4
1 5
1 5
1 6
2 1
2 2
2 3
2 3
2 4
2 5
2 5
2 6
3 1
3 1
3 2
3 2
3 3
3 3
3 3
3 3
3 4
3 4
3 5
3 5
3 5
3 5
3 6
3 6
4 1
4 2
4 3
4 3
4 4
4 5
4 5
4 6
5 1
5 1
5 2
5 2
5 3
5 3
5 3
5 3
5 4
5 4
5 5
5 5
5 5
5 5
5 6
5 6
6 1
6 2
6 3
6 3
6 4
6 5
6 5
6 6
# CTE within recursive CTE works and does not result in 'index out of bounds: the len is 0 but the index is 0'
query I
WITH RECURSIVE "recursive_cte" AS (
SELECT 1 as "val"
UNION ALL (
WITH "sub_cte" AS (
SELECT
time,
1 as "val"
FROM
(SELECT DISTINCT "time" FROM "balance")
)
SELECT
2 as "val"
FROM
"recursive_cte"
FULL JOIN "sub_cte" ON 1 = 1
WHERE
"recursive_cte"."val" < 2
)
)
SELECT
*
FROM
"recursive_cte";
----
1
2
2
# setup
statement ok
CREATE EXTERNAL TABLE prices STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/prices.csv'
# CTE within window function inside nested CTE works. This test demonstrates using a nested window function to recursively iterate over a column.
query RRII
WITH RECURSIVE "recursive_cte" AS (
(
WITH "min_prices_row_num_cte" AS (
SELECT
MIN("prices"."prices_row_num") AS "prices_row_num"
FROM
"prices"
),
"min_prices_row_num_cte_second" AS (
SELECT
MIN("prices"."prices_row_num") AS "prices_row_num_advancement"
FROM
"prices"
WHERE
"prices"."prices_row_num" > (
SELECT
"prices_row_num"
FROM
"min_prices_row_num_cte"
)
)
SELECT
0.0 AS "beg",
(0.0 + 50) AS "end",
(
SELECT
"prices_row_num"
FROM
"min_prices_row_num_cte"
) AS "prices_row_num",
(
SELECT
"prices_row_num_advancement"
FROM
"min_prices_row_num_cte_second"
) AS "prices_row_num_advancement"
FROM
"prices"
WHERE
"prices"."prices_row_num" = (
SELECT
DISTINCT "prices_row_num"
FROM
"min_prices_row_num_cte"
)
)
UNION ALL (
WITH "min_prices_row_num_cte" AS (
SELECT
"prices"."prices_row_num" AS "prices_row_num",
LEAD("prices"."prices_row_num", 1) OVER (
ORDER BY "prices_row_num"
) AS "prices_row_num_advancement"
FROM
(
SELECT
DISTINCT "prices_row_num"
FROM
"prices"
) AS "prices"
)
SELECT
"recursive_cte"."end" AS "beg",
("recursive_cte"."end" + 50) AS "end",
"min_prices_row_num_cte"."prices_row_num" AS "prices_row_num",
"min_prices_row_num_cte"."prices_row_num_advancement" AS "prices_row_num_advancement"
FROM
"recursive_cte"
FULL JOIN "prices" ON "prices"."prices_row_num" = "recursive_cte"."prices_row_num_advancement"
FULL JOIN "min_prices_row_num_cte" ON "min_prices_row_num_cte"."prices_row_num" = COALESCE(
"prices"."prices_row_num",
"recursive_cte"."prices_row_num_advancement"
)
WHERE
"recursive_cte"."prices_row_num_advancement" IS NOT NULL
)
)
SELECT
DISTINCT *
FROM
"recursive_cte"
ORDER BY
"prices_row_num" ASC;
----
0 50 1 2
50 100 2 3
100 150 3 4
150 200 4 5
200 250 5 6
250 300 6 7
300 350 7 8
350 400 8 9
400 450 9 10
450 500 10 11
500 550 11 12
550 600 12 13
600 650 13 14
650 700 14 15
700 750 15 16
750 800 16 17
800 850 17 18
850 900 18 19
900 950 19 20
950 1000 20 21
1000 1050 21 22
1050 1100 22 23
1100 1150 23 24
1150 1200 24 25
1200 1250 25 26
1250 1300 26 27
1300 1350 27 28
1350 1400 28 29
1400 1450 29 30
1450 1500 30 31
1500 1550 31 32
1550 1600 32 33
1600 1650 33 34
1650 1700 34 35
1700 1750 35 36
1750 1800 36 37
1800 1850 37 38
1850 1900 38 39
1900 1950 39 40
1950 2000 40 41
2000 2050 41 42
2050 2100 42 43
2100 2150 43 44
2150 2200 44 45
2200 2250 45 46
2250 2300 46 47
2300 2350 47 48
2350 2400 48 49
2400 2450 49 50
2450 2500 50 51
2500 2550 51 52
2550 2600 52 53
2600 2650 53 54
2650 2700 54 55
2700 2750 55 56
2750 2800 56 57
2800 2850 57 58
2850 2900 58 59
2900 2950 59 60
2950 3000 60 61
3000 3050 61 62
3050 3100 62 63
3100 3150 63 64
3150 3200 64 65
3200 3250 65 66
3250 3300 66 67
3300 3350 67 68
3350 3400 68 69
3400 3450 69 70
3450 3500 70 71
3500 3550 71 72
3550 3600 72 73
3600 3650 73 74
3650 3700 74 75
3700 3750 75 76
3750 3800 76 77
3800 3850 77 78
3850 3900 78 79
3900 3950 79 80
3950 4000 80 81
4000 4050 81 82
4050 4100 82 83
4100 4150 83 84
4150 4200 84 85
4200 4250 85 86
4250 4300 86 87
4300 4350 87 88
4350 4400 88 89
4400 4450 89 90
4450 4500 90 91
4500 4550 91 92
4550 4600 92 93
4600 4650 93 94
4650 4700 94 95
4700 4750 95 96
4750 4800 96 97
4800 4850 97 98
4850 4900 98 99
4900 4950 99 100
4950 5000 100 NULL
# setup
statement ok
CREATE EXTERNAL TABLE sales STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/sales.csv'
# setup
statement ok
CREATE EXTERNAL TABLE salespersons STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/salespersons.csv'
# group by works within recursive cte. This test case demonstrates rolling up a hierarchy of salespeople to their managers.
query III
WITH RECURSIVE region_sales AS (
-- Anchor member
SELECT
s.salesperson_id AS salesperson_id,
SUM(s.sale_amount) AS amount,
0 as level
FROM
sales s
GROUP BY
s.salesperson_id
UNION ALL
-- Recursive member
SELECT
sp.manager_id AS salesperson_id,
SUM(rs.amount) AS amount,
MIN(rs.level) + 1 as level
FROM
region_sales rs
INNER JOIN salespersons sp ON rs.salesperson_id = sp.salesperson_id
WHERE sp.manager_id IS NOT NULL
GROUP BY
sp.manager_id
)
SELECT
salesperson_id,
MAX(amount) as amount,
MAX(level) as hierarchy_level
FROM
region_sales
GROUP BY
salesperson_id
ORDER BY
hierarchy_level ASC, salesperson_id ASC;
----
4 700 0
5 600 0
6 500 0
7 900 0
2 1300 1
3 1400 1
1 2700 2
#expect error from recursive CTE with nested recursive terms
query error DataFusion error: This feature is not implemented: Recursive queries cannot be nested
WITH RECURSIVE outer_cte AS (
SELECT 1 as a
UNION ALL (
WITH RECURSIVE nested_cte AS (
SELECT 1 as a
UNION ALL
SELECT a+2 as a
FROM nested_cte where a < 3
)
SELECT outer_cte.a +2
FROM outer_cte JOIN nested_cte USING(a)
WHERE nested_cte.a < 4
)
)
SELECT a FROM outer_cte;
# expect error when recursive CTE is referenced multiple times in the recursive term
query error DataFusion error: This feature is not implemented: Multiple recursive references to the same CTE are not supported
WITH RECURSIVE my_cte AS (
SELECT 1 as a
UNION ALL
SELECT my_cte.a+2 as a
FROM my_cte join my_cte c2 using(a)
WHERE my_cte.a<5
)
SELECT a FROM my_cte;