delta-io · PierreDubrulle · Nov 13, 2023 · Nov 14, 2023 · wjones127 · Nov 17, 2023
@@ -4,6 +4,7 @@
 from dataclasses import dataclass
 from datetime import date, datetime
 from decimal import Decimal
+from hashlib import md5
 from math import inf
 from pathlib import Path
 from typing import (
@@ -62,6 +63,14 @@ class AddAction:
     stats: str
 
 
+def _hash_schema(schema: pa.Schema) -> str:
+    dict_schema = dict(zip(schema.names, [str(pa_types) for pa_types in schema.types]))
+    hash_dt_schema = md5(
+        json.dumps(dict_schema, sort_keys=True).encode("utf-8")
+    ).hexdigest()
+    return hash_dt_schema
+
+
 def write_deltalake(
     table_or_uri: Union[str, Path, DeltaTable],
     data: Union[
@@ -195,7 +204,12 @@ def write_deltalake(
         partition_by = [partition_by]
 
     if table:  # already exists
-        if schema != table.schema().to_pyarrow(as_large_types=large_dtypes) and not (
+        hash_table_schema = _hash_schema(
+            table.schema().to_pyarrow(as_large_types=large_dtypes)
+        )
+        hash_schema_provided = _hash_schema(schema)
+
+        if hash_schema_provided != hash_table_schema and not (
             mode == "overwrite" and overwrite_schema
         ):
             raise ValueError(