From f4d3c119a7ace7a142d0f4a92dabd0dbca3e25c0 Mon Sep 17 00:00:00 2001 From: fjetter Date: Tue, 9 Jan 2024 10:15:07 +0100 Subject: [PATCH 1/3] Check if main is in pickled stream very last --- distributed/protocol/pickle.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/distributed/protocol/pickle.py b/distributed/protocol/pickle.py index 8b4b7328e5b..43c867e0a4d 100644 --- a/distributed/protocol/pickle.py +++ b/distributed/protocol/pickle.py @@ -67,14 +67,17 @@ def dumps(x, *, buffer_callback=None, protocol=HIGHEST_PROTOCOL): buffers.clear() pickler.dump(x) result = f.getvalue() - if b"__main__" in result or ( + if ( CLOUDPICKLE_GE_20 and getattr(inspect.getmodule(x), "__name__", None) in cloudpickle.list_registry_pickle_by_value() + ) or ( + (len(result) < 1000 or not _always_use_pickle_for(x)) + # this is pretty expensive so check very last + and b"__main__" in result ): - if len(result) < 1000 or not _always_use_pickle_for(x): - buffers.clear() - result = cloudpickle.dumps(x, **dump_kwargs) + buffers.clear() + result = cloudpickle.dumps(x, **dump_kwargs) except Exception: try: buffers.clear() From ee7c1c5297f063fedcf92d0fd020d983f6c191e5 Mon Sep 17 00:00:00 2001 From: fjetter Date: Tue, 9 Jan 2024 10:20:06 +0100 Subject: [PATCH 2/3] never check for main if _always_use_pickle_for is true --- distributed/protocol/pickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/protocol/pickle.py b/distributed/protocol/pickle.py index 43c867e0a4d..8222c5e469d 100644 --- a/distributed/protocol/pickle.py +++ b/distributed/protocol/pickle.py @@ -72,7 +72,7 @@ def dumps(x, *, buffer_callback=None, protocol=HIGHEST_PROTOCOL): and getattr(inspect.getmodule(x), "__name__", None) in cloudpickle.list_registry_pickle_by_value() ) or ( - (len(result) < 1000 or not _always_use_pickle_for(x)) + (len(result) < 1000 and not _always_use_pickle_for(x)) # this is pretty expensive so check very last and b"__main__" in result ): From c5f1379200c4802b80470e24a2c1b2ac49b8e891 Mon Sep 17 00:00:00 2001 From: fjetter Date: Tue, 9 Jan 2024 10:50:05 +0100 Subject: [PATCH 3/3] rewrite the thing --- distributed/protocol/pickle.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/distributed/protocol/pickle.py b/distributed/protocol/pickle.py index 8222c5e469d..04b27174638 100644 --- a/distributed/protocol/pickle.py +++ b/distributed/protocol/pickle.py @@ -67,14 +67,16 @@ def dumps(x, *, buffer_callback=None, protocol=HIGHEST_PROTOCOL): buffers.clear() pickler.dump(x) result = f.getvalue() - if ( + + if not _always_use_pickle_for(x) and ( CLOUDPICKLE_GE_20 and getattr(inspect.getmodule(x), "__name__", None) in cloudpickle.list_registry_pickle_by_value() - ) or ( - (len(result) < 1000 and not _always_use_pickle_for(x)) - # this is pretty expensive so check very last - and b"__main__" in result + or ( + len(result) < 1000 + # Do this very last since it's expensive + and b"__main__" in result + ) ): buffers.clear() result = cloudpickle.dumps(x, **dump_kwargs)