diff --git a/integration_tests/README.md b/integration_tests/README.md index fbf24f3d0de..d1a47f83aaa 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -454,10 +454,11 @@ The marks you care about are all in marks.py For the most part you can ignore this file. It provides the underlying Spark session to operations that need it, but most tests should interact with it through `asserts.py`. -All data generation should occur within a Spark session. Typically this is done by passing a -lambda to functions in `asserts.py` such as `assert_gpu_and_cpu_are_equal_collect`. However, -for scalar generation like `gen_scalars`, you may need to put it in a `with_cpu_session`. It is -because negative scale decimals can have problems if called from outside of `with_spark_session`. +All data generation and Spark function calls should occur within a Spark session. Typically +this is done by passing a lambda to functions in `asserts.py` such as +`assert_gpu_and_cpu_are_equal_collect`. However, for scalar generation like `gen_scalars`, you +may need to put it in a `with_cpu_session`. It is because negative scale decimals can have +problems when calling `f.lit` from outside of `with_spark_session`. ## Guidelines for Testing diff --git a/integration_tests/src/main/python/collection_ops_test.py b/integration_tests/src/main/python/collection_ops_test.py index 61470b2e179..5751323ecee 100644 --- a/integration_tests/src/main/python/collection_ops_test.py +++ b/integration_tests/src/main/python/collection_ops_test.py @@ -67,8 +67,8 @@ def test_concat_double_list_with_lit(dg): @pytest.mark.parametrize('data_gen', non_nested_array_gens, ids=idfn) def test_concat_list_with_lit(data_gen): - lit_col1 = f.lit(with_cpu_session(lambda spark: gen_scalar(data_gen))).cast(data_gen.data_type) - lit_col2 = f.lit(with_cpu_session(lambda spark: gen_scalar(data_gen))).cast(data_gen.data_type) + lit_col1 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type) + lit_col2 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type) assert_gpu_and_cpu_are_equal_collect( lambda spark: binary_op_df(spark, data_gen).select( @@ -106,8 +106,8 @@ def test_map_concat(data_gen): @pytest.mark.parametrize('data_gen', map_gens_sample + decimal_64_map_gens + decimal_128_map_gens, ids=idfn) def test_map_concat_with_lit(data_gen): - lit_col1 = f.lit(with_cpu_session(lambda spark: gen_scalar(data_gen))).cast(data_gen.data_type) - lit_col2 = f.lit(with_cpu_session(lambda spark: gen_scalar(data_gen))).cast(data_gen.data_type) + lit_col1 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type) + lit_col2 = with_cpu_session(lambda spark: f.lit(gen_scalar(data_gen))).cast(data_gen.data_type) assert_gpu_and_cpu_are_equal_collect( lambda spark: binary_op_df(spark, data_gen).select( f.map_concat(f.col('a'), f.col('b'), lit_col1),