Skip to content

Commit

Permalink
update tpch examples for new pyarrow interval
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael-J-Ward committed Jun 14, 2024
1 parent e519c1b commit 67d3ec5
Show file tree
Hide file tree
Showing 9 changed files with 11 additions and 27 deletions.
4 changes: 1 addition & 3 deletions examples/tpch/q01_pricing_summary_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@
# want to report results for. It should be between 60-120 days before the end.
DAYS_BEFORE_FINAL = 90

# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval = pa.scalar((0, 0, DAYS_BEFORE_FINAL), type=pa.month_day_nano_interval())
interval = pa.scalar((0, DAYS_BEFORE_FINAL, 0), type=pa.month_day_nano_interval())

print("Final date in database:", greatest_ship_date)

Expand Down
4 changes: 1 addition & 3 deletions examples/tpch/q04_order_priority_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@
# Create a date object from the string
date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date()

# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval())
interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval())

# Limit results to cases where commitment date before receipt date
# Aggregate the results so we only get one row to join with the order table.
Expand Down
4 changes: 1 addition & 3 deletions examples/tpch/q05_local_supplier_volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@

date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date()

# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval())
interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval())

# Load the dataframes we need

Expand Down
4 changes: 1 addition & 3 deletions examples/tpch/q06_forecasting_revenue_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@

date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date()

# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval())
interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval())

# Load the dataframes we need

Expand Down
4 changes: 1 addition & 3 deletions examples/tpch/q10_returned_item_reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@

date_start_of_quarter = lit(datetime.strptime(DATE_START_OF_QUARTER, "%Y-%m-%d").date())

# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval_one_quarter = lit(pa.scalar((0, 0, 92), type=pa.month_day_nano_interval()))
interval_one_quarter = lit(pa.scalar((0, 92, 0), type=pa.month_day_nano_interval()))

# Load the dataframes we need

Expand Down
4 changes: 1 addition & 3 deletions examples/tpch/q12_ship_mode_order_priority.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@

date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date()

# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval = pa.scalar((0, 0, 365), type=pa.month_day_nano_interval())
interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval())


df = df_lineitem.filter(col("l_receiptdate") >= lit(date)).filter(
Expand Down
5 changes: 2 additions & 3 deletions examples/tpch/q14_promotion_effect.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@
DATE = "1995-09-01"

date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date())
# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval_one_month = lit(pa.scalar((0, 0, 30), type=pa.month_day_nano_interval()))

interval_one_month = lit(pa.scalar((0, 30, 0), type=pa.month_day_nano_interval()))

# Load the dataframes we need

Expand Down
5 changes: 2 additions & 3 deletions examples/tpch/q15_top_supplier.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@
DATE = "1996-01-01"

date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date())
# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval_3_months = lit(pa.scalar((0, 0, 91), type=pa.month_day_nano_interval()))

interval_3_months = lit(pa.scalar((0, 91, 0), type=pa.month_day_nano_interval()))

# Load the dataframes we need

Expand Down
4 changes: 1 addition & 3 deletions examples/tpch/q20_potential_part_promotion.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@

date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date()

# Note: this is a hack on setting the values. It should be set differently once
# https://github.com/apache/datafusion-python/issues/665 is resolved.
interval = pa.scalar((0, 0, 365), type=pa.month_day_nano_interval())
interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval())

# Filter down dataframes
df_nation = df_nation.filter(col("n_name") == lit(NATION_OF_INTEREST))
Expand Down

0 comments on commit 67d3ec5

Please sign in to comment.