Skip to content

Commit

Permalink
fix: Workaround for Pandas.DataFrame.to_csv bug
Browse files Browse the repository at this point in the history
  • Loading branch information
john-bodley committed May 30, 2024
1 parent 6575cac commit 61123b1
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 16 deletions.
2 changes: 1 addition & 1 deletion superset/utils/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def escape_values(v: Any) -> Union[str, Any]:
if isinstance(value, str):
df.at[idx, name] = escape_value(value)

return df.to_csv(**kwargs)
return df.to_csv(escapechar="\\", **kwargs)


def get_chart_csv_data(
Expand Down
44 changes: 29 additions & 15 deletions tests/integration_tests/utils/csv_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import io

import pandas as pd
import pyarrow as pa
Expand Down Expand Up @@ -59,24 +58,39 @@ def test_escape_value():


def test_df_to_escaped_csv():
csv_rows = [
["col_a", "=func()"],
["-10", "=cmd|' /C calc'!A0"],
["a", '""=b'],
[" =a", "b"],
]
csv_str = "\n".join([",".join(row) for row in csv_rows])

df = pd.read_csv(io.StringIO(csv_str))
df = pd.DataFrame(
data={
"value": [
"a",
"col_a",
"=func()",
"-10",
"=cmd|' /C calc'!A0",
'""=b',
" =a",
"\x00",
]
}
)

escaped_csv_str = csv.df_to_escaped_csv(
df,
encoding="utf8",
index=False,
header=False,
)

escaped_csv_str = csv.df_to_escaped_csv(df, encoding="utf8", index=False)
escaped_csv_rows = [row.split(",") for row in escaped_csv_str.strip().split("\n")]

assert escaped_csv_rows == [
["col_a", "'=func()"],
["-10", r"'=cmd\|' /C calc'!A0"],
["a", "'=b"], # pandas seems to be removing the leading ""
["' =a", "b"],
["a"],
["col_a"],
["'=func()"],
["-10"],
[r"'=cmd\\|' /C calc'!A0"],
['"\'""""=b"'],
["' =a"],
["\x00"],
]

df = pa.array([1, None]).to_pandas(integer_object_nulls=True).to_frame()
Expand Down

0 comments on commit 61123b1

Please sign in to comment.