elixir-explorer · philss · Mar 27, 2024 · Mar 26, 2024 · Mar 26, 2024 · Mar 26, 2024
diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex
@@ -1329,15 +1329,15 @@ defmodule Explorer.DataFrame do
 
     * `:streaming` - Tells the backend if it should use streaming, which means
       that the dataframe is not loaded to the memory at once, and instead it is
-      written in chunks from a lazy dataframe.  This option is not supported when using an S3
-      entry.
+      written in chunks from a lazy dataframe.  Defaults to true on supported filesystems,
+      ignored on all others.
 
   """
   @doc type: :io
   @spec to_csv(df :: DataFrame.t(), filename :: fs_entry() | String.t(), opts :: Keyword.t()) ::
           :ok | {:error, Exception.t()}
   def to_csv(df, filename, opts \\ []) do
-    opts = Keyword.validate!(opts, header: true, delimiter: ",", streaming: false, config: nil)
+    opts = Keyword.validate!(opts, header: true, delimiter: ",", streaming: true, config: nil)
 
     with {:ok, entry} <- normalise_entry(filename, opts[:config]) do
       Shared.apply_impl(df, :to_csv, [entry, opts[:header], opts[:delimiter], opts[:streaming]])

diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex
@@ -366,12 +366,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do
   end
 
   @impl true
-  def to_csv(%DF{data: _df}, %S3.Entry{}, _header?, _delimiter, _streaming = true) do
-    {:error, ArgumentError.exception("streaming is not supported for writes to AWS S3")}
-  end
-
-  @impl true
-  def to_csv(%DF{} = ldf, %S3.Entry{} = entry, header?, delimiter, _streaming = false) do
+  def to_csv(%DF{} = ldf, %S3.Entry{} = entry, header?, delimiter, _streaming) do
     eager_df = collect(ldf)
 
     Eager.to_csv(eager_df, entry, header?, delimiter, false)

diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs
@@ -254,7 +254,7 @@ defmodule Explorer.DataFrame.LazyTest do
     path = Path.join([tmp_dir, "fossil_fuels.csv"])
 
     ldf = DF.head(ldf, 15)
-    DF.to_csv!(ldf, path)
+    assert :ok = DF.to_csv(ldf, path)
 
     df = DF.collect(ldf)
     df1 = DF.from_csv!(path)
@@ -263,19 +263,20 @@ defmodule Explorer.DataFrame.LazyTest do
   end
 
   @tag :tmp_dir
-  test "to_csv/2 - with streaming enabled", %{ldf: ldf, tmp_dir: tmp_dir} do
+  test "to_csv/2 - with streaming disabled", %{ldf: ldf, tmp_dir: tmp_dir} do
     path = Path.join([tmp_dir, "fossil_fuels.csv"])
 
     ldf = DF.head(ldf, 15)
-    DF.to_csv!(ldf, path, streaming: true)
+    assert :ok = DF.to_csv(ldf, path, streaming: false)
 
     df = DF.collect(ldf)
     df1 = DF.from_csv!(path)
 
     assert DF.to_rows(df1) |> Enum.sort() == DF.to_rows(df) |> Enum.sort()
   end
 
-  test "to_csv/3 - cloud with streaming enabled", %{ldf: ldf} do
+  @tag :cloud_integration
+  test "to_csv/3 - cloud with streaming enabled - ignores streaming option", %{ldf: ldf} do
     config = %FSS.S3.Config{
       access_key_id: "test",
       secret_access_key: "test",
@@ -286,9 +287,12 @@ defmodule Explorer.DataFrame.LazyTest do
     path = "s3://test-bucket/test-lazy-writes/wine-#{System.monotonic_time()}.csv"
 
     ldf = DF.head(ldf, 15)
-    assert {:error, error} = DF.to_ipc(ldf, path, streaming: true, config: config)
+    assert :ok = DF.to_csv(ldf, path, streaming: true, config: config)
 
-    assert error == ArgumentError.exception("streaming is not supported for writes to AWS S3")
+    df = DF.collect(ldf)
+    df1 = DF.from_csv!(path, config: config)
+
+    assert DF.to_rows(df1) |> Enum.sort() == DF.to_rows(df) |> Enum.sort()
   end
 
   @tag :tmp_dir