From 05dc169e67cf7095f7dd28bff1cc3276c9ceaf34 Mon Sep 17 00:00:00 2001 From: William Lanchantin Date: Wed, 27 Nov 2024 13:19:54 -0500 Subject: [PATCH] Treat bitstrings like a list of codepoints --- lib/explorer/data_frame.ex | 50 +++++++++++++++++-------------- test/explorer/data_frame_test.exs | 2 +- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 71a017e06..df8a5ad36 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -6006,39 +6006,43 @@ defmodule Explorer.DataFrame do end defp format_column(list, depth) when is_list(list) do - indent = String.duplicate(" ", depth - 1) - - contents = Enum.map(list, &format_column(&1, depth + 1)) - - if length(contents) > 1 or Enum.any?(contents, &String.contains?(&1, "\n")) do - "[\n #{indent}#{Enum.join(contents, "\n " <> indent)}\n#{indent}]" - else - "[#{contents}]" - end + list + |> Enum.map(&format_column(&1, depth + 1)) + |> multiline(depth, "[", "]") end # TODO: Use is_non_struct_map when we require Elixir v1.17+ defp format_column(map, depth) when is_map(map) and not is_struct(map) do - indent = String.duplicate(" ", max(depth - 1, 0)) + map + |> Enum.sort_by(fn {k, _} -> k end) + |> Enum.map(fn {k, v} -> "#{k}: #{format_column(v, depth + 1)}" end) + |> multiline(depth, "{", "}") + end - contents = - map - |> Enum.sort_by(fn {k, _} -> k end) - |> Enum.map(fn {k, v} -> "#{k}: #{format_column(v, depth + 1)}" end) + defp format_column(value, depth) do + cond do + is_nil(value) -> + "nil" - if length(contents) > 1 or Enum.any?(contents, &String.contains?(&1, "\n")) do - "{\n #{indent}#{Enum.join(contents, "\n " <> indent)}\n#{indent}}" - else - "{#{contents}}" + is_binary(value) and not String.valid?(value) -> + value + |> String.codepoints() + |> Enum.map(fn <> -> to_string(x) end) + |> multiline(depth, "<<", ">>") + + true -> + to_string(value) end end - defp format_column(nil, _depth) do - "nil" - end + defp multiline(contents, depth, left, right) do + indent = String.duplicate(" ", max(depth - 1, 0)) - defp format_column(value, _depth) do - to_string(value) + if length(contents) > 1 or Enum.any?(contents, &String.contains?(&1, "\n")) do + "#{left}\n #{indent}#{Enum.join(contents, "\n " <> indent)}\n#{indent}#{right}" + else + "#{left}#{contents}#{right}" + end end @doc """ diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index fd4c452af..eeee2ae3d 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -4766,7 +4766,7 @@ defmodule Explorer.DataFrameTest do property "should be able to print any DataFrame" do check all( - dtypes <- Explorer.Generator.dtypes(exclude: [:binary, :category]), + dtypes <- Explorer.Generator.dtypes(exclude: :category), rows <- Explorer.Generator.rows(dtypes), max_runs: 1_000 ) do