From 039243f5ac481e94ea13dd6f21c9f0bd0db58249 Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Tue, 25 May 2021 21:40:24 +0800 Subject: [PATCH] Add comments for lazy binding in WindowInPandas (#2496) Signed-off-by: Firestarman --- .../rapids/shims/spark301db/GpuWindowInPandasExec.scala | 6 +++++- .../python/shims/spark310db/GpuWindowInPandasExec.scala | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala index 3bc3f31e897..c734e4e7bff 100644 --- a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala +++ b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,6 +54,10 @@ case class GpuWindowInPandasExec( } } + // On Databricks, binding the references on driver side will get some invalid expressions + // (e.g. none#0L, none@1L) in the `projectList`, causing failures in `test_window` test. + // So need to do the binding for `projectList` lazily, and the binding will actually run + // on executors now. private lazy val outReferences = { val allExpressions = windowFramesWithExpressions.map(_._2).flatten val references = allExpressions.zipWithIndex.map { case (e, i) => diff --git a/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala b/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala index 0adf2faea7f..c7c5e96aa98 100644 --- a/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala +++ b/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala @@ -64,6 +64,10 @@ case class GpuWindowInPandasExec( } } + // On Databricks, binding the references on driver side will get some invalid expressions + // (e.g. none#0L, none@1L) in the `projectList`, causing failures in `test_window` test. + // So need to do the binding for `projectList` lazily, and the binding will actually run + // on executors now. private lazy val outReferences = { val allExpressions = windowFramesWithExpressions.map(_._2).flatten val references = allExpressions.zipWithIndex.map { case (e, i) =>