From 5575a7c535b83b4f95a11859baba4c229790fac4 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 10 Jun 2022 07:44:43 -0300 Subject: [PATCH 01/13] Don't update vars(main) twice --- dill/_dill.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index df3f69fa..8d38f880 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -510,19 +510,17 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): def load_session(filename='/tmp/session.pkl', main=None, **kwds): """update the __main__ module with the state from the session file""" - if main is None: main = _main_module if hasattr(filename, 'read'): f = filename else: f = open(filename, 'rb') try: #FIXME: dill.settings are disabled unpickler = Unpickler(f, **kwds) - unpickler._main = main unpickler._session = True + if main is not None: + unpickler._main = main module = unpickler.load() - unpickler._session = False - main.__dict__.update(module.__dict__) - _restore_modules(unpickler, main) + _restore_modules(unpickler, module) finally: if f is not filename: # If newly opened file f.close() From ba4bce3f49d09e575d7a29dca85c7071ff0e23e7 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 10 Jun 2022 07:46:58 -0300 Subject: [PATCH 02/13] Inspect the pickle beginnig to identify main and check against 'main' argument --- dill/_dill.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/dill/_dill.py b/dill/_dill.py index 8d38f880..9184f560 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -508,17 +508,52 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): f.close() return +def _inspect_pickle(file, main): + from pickletools import genops + UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} + found_import = False + try: + for opcode, arg, pos in genops(file.peek(256)): + if not found_import: + if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \ + arg.endswith('_import_module'): + found_import = True + else: + if opcode.name in UNICODE: + if not all(name.isidentifier() for name in arg.split('.')): + raise UnpicklingError("invalid module name: %r" % arg) + return arg + else: + raise UnpicklingError("reached STOP without finding main module") + except (AttributeError, ValueError) as error: + if isinstance(error, AttributeError) and main is not None: + # File is not peekable, but we have main. + return None + raise UnpicklingError("unable to identify main module") from error + def load_session(filename='/tmp/session.pkl', main=None, **kwds): """update the __main__ module with the state from the session file""" if hasattr(filename, 'read'): f = filename + if not hasattr(f, 'peek'): + try: + import io + f = io.BufferedReader(f) + except Exception: + pass # ...and hope for the best else: f = open(filename, 'rb') try: #FIXME: dill.settings are disabled unpickler = Unpickler(f, **kwds) unpickler._session = True + pickle_main = _inspect_pickle(f, main) if main is not None: + if pickle_main is not None and main.__name__ != pickle_main: + raise UnpicklingError("can't load module %r into module %r" % \ + (pickle_main, unpickler._main.__name__)) unpickler._main = main + else: + unpickler._main = _import_module(pickle_main) module = unpickler.load() _restore_modules(unpickler, module) finally: From 624ba135f8bdd5cb46071c8d06d3376d1d649cb8 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Sun, 12 Jun 2022 21:09:37 -0300 Subject: [PATCH 03/13] Save and restore modules created at runtime with ModuleType() --- dill/_dill.py | 102 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 30 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 9184f560..00fcbb5a 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -488,11 +488,11 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): protocol = settings['protocol'] if main is None: main = _main_module if hasattr(filename, 'write'): - f = filename + file = filename else: - f = open(filename, 'wb') + file = open(filename, 'wb') try: - pickler = Pickler(f, protocol, **kwds) + pickler = Pickler(file, protocol, **kwds) pickler._original_main = main if byref: main = _stash_modules(main) @@ -504,11 +504,11 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): pickler._main_modified = main is not pickler._original_main pickler.dump(main) finally: - if f is not filename: # If newly opened file - f.close() + if file is not filename: # if newly opened file + file.close() return -def _inspect_pickle(file, main): +def _inspect_pickle(file, main_is_none): from pickletools import genops UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} found_import = False @@ -526,7 +526,7 @@ def _inspect_pickle(file, main): else: raise UnpicklingError("reached STOP without finding main module") except (AttributeError, ValueError) as error: - if isinstance(error, AttributeError) and main is not None: + if isinstance(error, AttributeError) and not main_is_none: # File is not peekable, but we have main. return None raise UnpicklingError("unable to identify main module") from error @@ -534,32 +534,68 @@ def _inspect_pickle(file, main): def load_session(filename='/tmp/session.pkl', main=None, **kwds): """update the __main__ module with the state from the session file""" if hasattr(filename, 'read'): - f = filename - if not hasattr(f, 'peek'): - try: - import io - f = io.BufferedReader(f) - except Exception: - pass # ...and hope for the best + file = filename else: - f = open(filename, 'rb') - try: #FIXME: dill.settings are disabled - unpickler = Unpickler(f, **kwds) + file = open(filename, 'rb') + if not hasattr(file, 'peek'): + try: + import io + file = io.BufferedReader(file) + except Exception: + pass # ...and hope for the best + try: + #FIXME: dill.settings are disabled + unpickler = Unpickler(file, **kwds) unpickler._session = True - pickle_main = _inspect_pickle(f, main) + pickle_main = _inspect_pickle(file, main is None) + + # Resolve unpickler._main + if main is None and pickle_main is not None: + main = pickle_main + if isinstance(main, str): + if main.startswith('__runtime__.'): + # Create runtime module to load the session into. + main = ModuleType(main.partition('.')[-1]) + else: + main = _import_module(main) if main is not None: - if pickle_main is not None and main.__name__ != pickle_main: - raise UnpicklingError("can't load module %r into module %r" % \ - (pickle_main, unpickler._main.__name__)) + if not isinstance(main, ModuleType): + raise ValueError("%r is not a module" % main) unpickler._main = main - else: - unpickler._main = _import_module(pickle_main) + main = unpickler._main + + # Check against the pickle's main. + is_main_imported = _is_imported_module(main) + if pickle_main is not None: + is_runtime_mod = pickle_main.startswith('__runtime__.') + if is_runtime_mod: + pickle_main = pickle_main.partition('.')[-1] + if is_runtime_mod and is_main_imported: + raise UnpicklingError("can't restore non-imported module %r into an imported one" \ + % pickle_main) + if not is_runtime_mod and not is_main_imported: + raise UnpicklingError("can't restore imported module %r into a non-imported one" \ + % pickle_main) + if main.__name__ != pickle_main: + raise UnpicklingError("can't restore module %r into module %r" \ + % (pickle_main, main.__name__)) + + # This is for find_class() to be able to locate it. + if not is_main_imported: + runtime_main = '__runtime__.%s' % main.__name__ + sys.modules[runtime_main] = main + module = unpickler.load() - _restore_modules(unpickler, module) finally: - if f is not filename: # If newly opened file - f.close() - return + if file is not filename: # if newly opened file + file.close() + try: + del sys.modules[runtime_main] + except (KeyError, NameError): + pass + _restore_modules(unpickler, module) + if module is not _main_module: + return module ### End: Pickle the Interpreter @@ -1286,14 +1322,16 @@ def _dict_from_dictproxy(dictproxy): def _import_module(import_name, safe=False): try: - if '.' in import_name: + if import_name.startswith('__runtime__.'): + return sys.modules[import_name] + elif '.' in import_name: items = import_name.split('.') module = '.'.join(items[:-1]) obj = items[-1] else: return __import__(import_name) return getattr(__import__(module, None, None, [obj]), obj) - except (ImportError, AttributeError): + except (ImportError, AttributeError, KeyError): if safe: return None raise @@ -1976,6 +2014,9 @@ def _is_builtin_module(module): module.__file__.endswith(EXTENSION_SUFFIXES) or \ 'site-packages' in module.__file__ +def _is_imported_module(module): + return getattr(module, '__loader__', None) is not None or module in sys.modules.values() + @register(ModuleType) def save_module(pickler, obj): if False: #_use_diff: @@ -2003,7 +2044,8 @@ def save_module(pickler, obj): _main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy? [_main_dict.pop(item, None) for item in singletontypes + ["__builtins__", "__loader__"]] - pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, + mod_name = obj.__name__ if _is_imported_module(obj) else '__runtime__.%s' % obj.__name__ + pickler.save_reduce(_import_module, (mod_name,), obj=obj, state=_main_dict) log.info("# M1") elif PY3 and obj.__name__ == "dill._dill": From 8f721cf367a1352fe6f6baa03cc89e11ea93b34b Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Sun, 12 Jun 2022 21:14:36 -0300 Subject: [PATCH 04/13] tests: don't need to add runtime module to sys.modules --- tests/test_session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_session.py b/tests/test_session.py index 8d036fb5..36df54fb 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -209,7 +209,7 @@ def test_objects(main, copy_dict, byref): dump = test_file.getvalue() test_file.close() - main = sys.modules[modname] = ModuleType(modname) # empty + main = ModuleType(modname) # empty # This should work after fixing https://github.com/uqfoundation/dill/issues/462 test_file = dill._dill.StringIO(dump) dill.load_session(test_file, main=main) From bea562754c187d549b95a93da77000090b8a7c73 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Mon, 13 Jun 2022 10:38:26 -0300 Subject: [PATCH 05/13] load_session_copy(): load a session state into a runtime module --- dill/__init__.py | 12 ++++---- dill/_dill.py | 73 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 21 deletions(-) diff --git a/dill/__init__.py b/dill/__init__.py index ac93ff6a..38c7be7e 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -283,11 +283,13 @@ """ -from ._dill import dump, dumps, load, loads, dump_session, load_session, \ - Pickler, Unpickler, register, copy, pickle, pickles, check, \ - HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, \ - HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, PickleWarning, \ - PicklingWarning, UnpicklingWarning +from ._dill import ( + dump, dumps, load, loads, dump_session, load_session, load_session_copy, + Pickler, Unpickler, register, copy, pickle, pickles, check, + HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, + HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, PickleWarning, + PicklingWarning, UnpicklingWarning, + ) from . import source, temp, detect # get global settings diff --git a/dill/_dill.py b/dill/_dill.py index 00fcbb5a..81e7f1d7 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -15,11 +15,12 @@ Test against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns. Test against CH16+ Std. Lib. ... TBD. """ -__all__ = ['dump','dumps','load','loads','dump_session','load_session', - 'Pickler','Unpickler','register','copy','pickle','pickles', - 'check','HIGHEST_PROTOCOL','DEFAULT_PROTOCOL','PicklingError', - 'UnpicklingError','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE', - 'PickleError','PickleWarning','PicklingWarning','UnpicklingWarning'] +__all__ = ['dump', 'dumps', 'load', 'loads', 'dump_session', 'load_session', + 'load_session_copy', 'Pickler', 'Unpickler', 'register', 'copy', + 'pickle', 'pickles', 'check', 'HIGHEST_PROTOCOL', 'DEFAULT_PROTOCOL', + 'PicklingError', 'UnpicklingError', 'HANDLE_FMODE', 'CONTENTS_FMODE', + 'FILE_FMODE', 'PickleError', 'PickleWarning', 'PicklingWarning', + 'UnpicklingWarning'] __module__ = 'dill' @@ -508,6 +509,19 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): file.close() return +def _open_peekable(filename): + if hasattr(filename, 'read'): + file = filename + else: + file = open(filename, 'rb') + if not hasattr(file, 'peek'): + try: + import io + file = io.BufferedReader(file) + except Exception: + pass # ...and hope for the best + return file + def _inspect_pickle(file, main_is_none): from pickletools import genops UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} @@ -533,16 +547,7 @@ def _inspect_pickle(file, main_is_none): def load_session(filename='/tmp/session.pkl', main=None, **kwds): """update the __main__ module with the state from the session file""" - if hasattr(filename, 'read'): - file = filename - else: - file = open(filename, 'rb') - if not hasattr(file, 'peek'): - try: - import io - file = io.BufferedReader(file) - except Exception: - pass # ...and hope for the best + file = _open_peekable(filename) try: #FIXME: dill.settings are disabled unpickler = Unpickler(file, **kwds) @@ -587,16 +592,52 @@ def load_session(filename='/tmp/session.pkl', main=None, **kwds): module = unpickler.load() finally: - if file is not filename: # if newly opened file + if not hasattr(filename, 'read'): # if newly opened file file.close() try: del sys.modules[runtime_main] except (KeyError, NameError): pass + assert module is main _restore_modules(unpickler, module) if module is not _main_module: return module +def load_session_copy(filename='/tmp/session.pkl', **kwds): + """ + Load the state of a module saved to a session file into a runtime created module. + + The loaded module's origin is stored in the '__session__' attribute. + Warning: this function is completely thread-unsafe. + """ + if 'main' in kwds: + raise TypeError("'main' is an invalid keyword argument for load_session_copy()") + file = _open_peekable(filename) + try: + pickle_main = _inspect_pickle(file, main_is_none=True) + main = _import_module(pickle_main) + main_globals = vars(main).copy() + vars(main).clear() + for attr in ('__builtins__', '__loader__', '__name__'): + # Required by load_session(). + if attr in main_globals: + setattr(main, attr, main_globals[attr]) + load_session(file, **kwds) + module = ModuleType(main.__name__) + vars(module).update(vars(main)) + finally: + if not hasattr(filename, 'read'): # if newly opened file + file.close() + try: + vars(main).clear() + vars(main).update(main_globals) + except NameError: + pass + module.pop('__path__', None) + module.__loader__ = module.__spec__ = None + module.__session__ = filename if isinstance(filename, str) else repr(filename) + return module + ### End: Pickle the Interpreter class MetaCatchingDict(dict): From 3d5b2a91ddfc07630e20be3117642e8ca67f1758 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 17 Jun 2022 16:47:11 -0300 Subject: [PATCH 06/13] tests: session tests code reorganization --- tests/test_session.py | 268 ++++++++++++++++++++---------------------- 1 file changed, 126 insertions(+), 142 deletions(-) diff --git a/tests/test_session.py b/tests/test_session.py index 36df54fb..b7506b70 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -6,59 +6,54 @@ # - https://github.com/uqfoundation/dill/blob/master/LICENSE from __future__ import print_function -import atexit, dill, os, sys, __main__ +import atexit, dill, io, os, sys, __main__ session_file = os.path.join(os.path.dirname(__file__), 'session-byref-%s.pkl') -def test_modules(main, byref): - main_dict = main.__dict__ +################### +# Child process # +################### - try: - for obj in ('json', 'url', 'local_mod', 'sax', 'dom'): - assert main_dict[obj].__name__ in sys.modules - - for obj in ('Calendar', 'isleap'): - assert main_dict[obj] is sys.modules['calendar'].__dict__[obj] - assert main.day_name.__module__ == 'calendar' - if byref: - assert main.day_name is sys.modules['calendar'].__dict__['day_name'] +def _error_line(error, obj, byref): + import traceback + line = traceback.format_exc().splitlines()[-2].replace('[obj]', '['+repr(obj)+']') + return "while testing (with byref=%s): %s" % (byref, line.lstrip()) - assert main.complex_log is sys.modules['cmath'].__dict__['log'] - - except AssertionError: - import traceback - error_line = traceback.format_exc().splitlines()[-2].replace('[obj]', '['+repr(obj)+']') - print("Error while testing (byref=%s):" % byref, error_line, sep="\n", file=sys.stderr) - raise - - -# Test session loading in a fresh interpreter session. if __name__ == '__main__' and len(sys.argv) >= 3 and sys.argv[1] == '--child': - byref = sys.argv[2] == 'True' + # Test session loading in a fresh interpreter session. + byref = (sys.argv[2] == 'True') dill.load_session(session_file % byref) - test_modules(__main__, byref) - sys.exit() -del test_modules + def test_modules(byref): + # FIXME: In this test setting with CPython 3.7, 'calendar' is not included in sys.modules, + # independent of the value of byref. Tried to run garbage collection just before loading the + # session with no luck. It fails even when preceding them with 'import calendar'. Needed to + # run these kinds of tests in a supbrocess. Failing test sample: + # assert globals()['day_name'] is vars(sys.modules['calendar'])['day_name'] + try: + for obj in ('json', 'url', 'local_mod', 'sax', 'dom'): + assert globals()[obj].__name__ in sys.modules + assert 'calendar' in sys.modules and 'cmath' in sys.modules + import calendar, cmath + for obj in ('Calendar', 'isleap'): + assert globals()[obj] is sys.modules['calendar'].__dict__[obj] + assert __main__.day_name.__module__ == 'calendar' + if byref: + assert __main__.day_name is calendar.day_name -def _clean_up_cache(module): - cached = module.__file__.split('.', 1)[0] + '.pyc' - cached = module.__cached__ if hasattr(module, '__cached__') else cached - pycache = os.path.join(os.path.dirname(module.__file__), '__pycache__') - for remove, file in [(os.remove, cached), (os.removedirs, pycache)]: - try: - remove(file) - except OSError: - pass + assert __main__.complex_log is cmath.log + except AssertionError as error: + error.args = (_error_line(error, obj, byref),) + raise -# To clean up namespace before loading the session. -original_modules = set(sys.modules.keys()) - \ - set(['json', 'urllib', 'xml.sax', 'xml.dom.minidom', 'calendar', 'cmath']) -original_objects = set(__main__.__dict__.keys()) -original_objects.add('original_objects') + test_modules(byref) + sys.exit() +#################### +# Parent process # +#################### # Create various kinds of objects to test different internal logics. @@ -68,7 +63,6 @@ def _clean_up_cache(module): from xml import sax # submodule import xml.dom.minidom as dom # submodule under alias import test_dictviews as local_mod # non-builtin top-level module -atexit.register(_clean_up_cache, local_mod) ## Imported objects. from calendar import Calendar, isleap, day_name # class, function, other object @@ -92,103 +86,116 @@ def weekdays(self): selfref = __main__ -def test_objects(main, copy_dict, byref): - main_dict = main.__dict__ +# Setup global namespace for session saving tests. +class TestNamespace: + test_globals = globals().copy() + def __init__(self, **extra): + self.extra = extra + def __enter__(self): + self.backup = globals().copy() + globals().clear() + globals().update(self.test_globals) + globals().update(self.extra) + return self + def __exit__(self, *exc_info): + globals().clear() + globals().update(self.backup) + + +def _clean_up_cache(module): + cached = module.__file__.split('.', 1)[0] + '.pyc' + cached = module.__cached__ if hasattr(module, '__cached__') else cached + pycache = os.path.join(os.path.dirname(module.__file__), '__pycache__') + for remove, file in [(os.remove, cached), (os.removedirs, pycache)]: + try: + remove(file) + except OSError: + pass + +atexit.register(_clean_up_cache, local_mod) - try: - for obj in ('json', 'url', 'local_mod', 'sax', 'dom'): - assert main_dict[obj].__name__ == copy_dict[obj].__name__ - #FIXME: In the second test call, 'calendar' is not included in - # sys.modules, independent of the value of byref. Tried to run garbage - # collection before with no luck. This block fails even with - # "import calendar" before it. Needed to restore the original modules - # with the 'copy_modules' object. (Moved to "test_session_{1,2}.py".) +def _test_objects(main, globals_copy, byref): + try: + main_dict = vars(__main__) + global Person, person, Calendar, CalendarSubclass, cal, selfref - #for obj in ('Calendar', 'isleap'): - # assert main_dict[obj] is sys.modules['calendar'].__dict__[obj] - #assert main_dict['day_name'].__module__ == 'calendar' - #if byref: - # assert main_dict['day_name'] is sys.modules['calendar'].__dict__['day_name'] + for obj in ('json', 'url', 'local_mod', 'sax', 'dom'): + assert globals()[obj].__name__ == globals_copy[obj].__name__ for obj in ('x', 'empty', 'names'): - assert main_dict[obj] == copy_dict[obj] + assert main_dict[obj] == globals_copy[obj] globs = '__globals__' if dill._dill.PY3 else 'func_globals' for obj in ['squared', 'cubed']: assert getattr(main_dict[obj], globs) is main_dict - assert main_dict[obj](3) == copy_dict[obj](3) + assert main_dict[obj](3) == globals_copy[obj](3) - assert main.Person.__module__ == main.__name__ - assert isinstance(main.person, main.Person) - assert main.person.age == copy_dict['person'].age + assert Person.__module__ == __main__.__name__ + assert isinstance(person, Person) + assert person.age == globals_copy['person'].age - assert issubclass(main.CalendarSubclass, main.Calendar) - assert isinstance(main.cal, main.CalendarSubclass) - assert main.cal.weekdays() == copy_dict['cal'].weekdays() + assert issubclass(CalendarSubclass, Calendar) + assert isinstance(cal, CalendarSubclass) + assert cal.weekdays() == globals_copy['cal'].weekdays() - assert main.selfref is main + assert selfref is __main__ - except AssertionError: - import traceback - error_line = traceback.format_exc().splitlines()[-2].replace('[obj]', '['+repr(obj)+']') - print("Error while testing (byref=%s):" % byref, error_line, sep="\n", file=sys.stderr) + except AssertionError as error: + error.args = (_error_line(error, obj, byref),) raise -if __name__ == '__main__': +def test_session_main(byref): + """test dump/load_session() for __main__, both in this process and in a subprocess""" + extra_objects = {} + if byref: + # Test unpickleable imported object in main. + from sys import flags + extra_objects['flags'] = flags - # Test dump_session() and load_session(). - for byref in (False, True): - if byref: - # Test unpickleable imported object in main. - from sys import flags - - #print(sorted(set(sys.modules.keys()) - original_modules)) - dill._test_file = dill._dill.StringIO() + with TestNamespace(**extra_objects) as ns: try: - # For the subprocess. - dill.dump_session(session_file % byref, byref=byref) - - dill.dump_session(dill._test_file, byref=byref) - dump = dill._test_file.getvalue() - dill._test_file.close() - - import __main__ - copy_dict = __main__.__dict__.copy() - copy_modules = sys.modules.copy() - del copy_dict['dump'] - del copy_dict['__main__'] - for name in copy_dict.keys(): - if name not in original_objects: - del __main__.__dict__[name] - for module in list(sys.modules.keys()): - if module not in original_modules: - del sys.modules[module] - - dill._test_file = dill._dill.StringIO(dump) - dill.load_session(dill._test_file) - #print(sorted(set(sys.modules.keys()) - original_modules)) - # Test session loading in a new session. + dill.dump_session(session_file % byref, byref=byref) from dill.tests.__main__ import python, shell, sp error = sp.call([python, __file__, '--child', str(byref)], shell=shell) if error: sys.exit(error) - del python, shell, sp - finally: - dill._test_file.close() try: os.remove(session_file % byref) except OSError: pass - test_objects(__main__, copy_dict, byref) - __main__.__dict__.update(copy_dict) - sys.modules.update(copy_modules) - del __main__, copy_dict, copy_modules, dump + # Test session loading in the same session. + session_buffer = io.BytesIO() + dill.dump_session(session_buffer, byref=byref) + session_buffer.seek(0) + dill.load_session(session_buffer) + ns.backup['_test_objects'](__main__, ns.backup, byref) +def test_session_other(): + """test dump/load_session() for a module other than __main__""" + import test_classdef as module + atexit.register(_clean_up_cache, module) + module.selfref = module + dict_objects = [obj for obj in module.__dict__.keys() if not obj.startswith('__')] + + session_buffer = io.BytesIO() + dill.dump_session(test_file, main=module) + + for obj in dict_objects: + del module.__dict__[obj] + + session_buffer.seek(0) + dill.load_session(session_buffer) #, main=module) + + assert all(obj in module.__dict__ for obj in dict_objects) + assert module.selfref is module + + +def test_byref_without_byref_objects(): # This is for code coverage, tests the use case of dump_session(byref=True) # without imported objects in the namespace. It's a contrived example because # even dill can't be in it. @@ -203,41 +210,18 @@ def test_objects(main, copy_dict, byref): _main.__dill_imported, _main.__dill_imported_as, _main.__dill_imported_top_level, file=sys.stderr) - test_file = dill._dill.StringIO() - try: - dill.dump_session(test_file, main=main, byref=True) - dump = test_file.getvalue() - test_file.close() - - main = ModuleType(modname) # empty - # This should work after fixing https://github.com/uqfoundation/dill/issues/462 - test_file = dill._dill.StringIO(dump) - dill.load_session(test_file, main=main) - finally: - test_file.close() + session_buffer = io.BytesIO() + dill.dump_session(session_buffer, main=main, byref=True) + main = ModuleType(modname) # empty + session_buffer.seek(0) + # This should work after fixing https://github.com/uqfoundation/dill/issues/462 + dill.load_session(test_file, main=main) assert main.x == 42 - # Dump session for module that is not __main__: - import test_classdef as module - atexit.register(_clean_up_cache, module) - module.selfref = module - dict_objects = [obj for obj in module.__dict__.keys() if not obj.startswith('__')] - - test_file = dill._dill.StringIO() - try: - dill.dump_session(test_file, main=module) - dump = test_file.getvalue() - test_file.close() - - for obj in dict_objects: - del module.__dict__[obj] - - test_file = dill._dill.StringIO(dump) - dill.load_session(test_file, main=module) - finally: - test_file.close() - - assert all(obj in module.__dict__ for obj in dict_objects) - assert module.selfref is module +if __name__ == '__main__': + test_session_main(byref=False) + test_session_main(byref=True) + test_session_other() + test_byref_without_byref_objects() From 5bf3603bf9a05c06a8501c28bdc059a93d7761d3 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 24 Jun 2022 09:21:50 -0300 Subject: [PATCH 07/13] tests: test runtime created module session saving --- tests/test_session.py | 46 ++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/tests/test_session.py b/tests/test_session.py index b7506b70..6a6ab5e5 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -183,7 +183,7 @@ def test_session_other(): dict_objects = [obj for obj in module.__dict__.keys() if not obj.startswith('__')] session_buffer = io.BytesIO() - dill.dump_session(test_file, main=module) + dill.dump_session(session_buffer, main=module) for obj in dict_objects: del module.__dict__[obj] @@ -195,33 +195,43 @@ def test_session_other(): assert module.selfref is module -def test_byref_without_byref_objects(): - # This is for code coverage, tests the use case of dump_session(byref=True) - # without imported objects in the namespace. It's a contrived example because - # even dill can't be in it. +def test_runtime_module(): from types import ModuleType - modname = '__test_main__' - main = ModuleType(modname) - main.x = 42 + modname = '__runtime__' + runtime = ModuleType(modname) + runtime.x = 42 - _main = dill._dill._stash_modules(main) - if _main is not main: + mod = dill._dill._stash_modules(runtime) + if mod is not runtime: print("There are objects to save by referenece that shouldn't be:", - _main.__dill_imported, _main.__dill_imported_as, _main.__dill_imported_top_level, + mod.__dill_imported, mod.__dill_imported_as, mod.__dill_imported_top_level, file=sys.stderr) + # This is also for code coverage, tests the use case of dump_session(byref=True) + # without imported objects in the namespace. It's a contrived example because + # even dill can't be in it. This should work after fixing #462. session_buffer = io.BytesIO() - dill.dump_session(session_buffer, main=main, byref=True) - main = ModuleType(modname) # empty + dill.dump_session(session_buffer, main=runtime, byref=True) + session_dump = session_buffer.getvalue() + + # Pass a new runtime created module with the same name. + runtime = ModuleType(modname) # empty + returned_mod = dill.load_session(io.BytesIO(session_dump), main=runtime) + assert returned_mod is runtime + assert runtime.__name__ == modname + assert runtime.x == 42 + assert runtime not in sys.modules.values() + + # Pass nothing as main. load_session() must create it. session_buffer.seek(0) - # This should work after fixing https://github.com/uqfoundation/dill/issues/462 - dill.load_session(test_file, main=main) - - assert main.x == 42 + runtime = dill.load_session(io.BytesIO(session_dump)) + assert runtime.__name__ == modname + assert runtime.x == 42 + assert runtime not in sys.modules.values() if __name__ == '__main__': test_session_main(byref=False) test_session_main(byref=True) test_session_other() - test_byref_without_byref_objects() + test_runtime_module() From 18ef25fec9c674c82e3e92c36f10ed947298c8c8 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 24 Jun 2022 09:42:31 -0300 Subject: [PATCH 08/13] tests: test load_session_copy --- dill/_dill.py | 2 +- tests/test_session.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index 81e7f1d7..5ebc60d9 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -633,7 +633,7 @@ def load_session_copy(filename='/tmp/session.pkl', **kwds): vars(main).update(main_globals) except NameError: pass - module.pop('__path__', None) + vars(module).pop('__path__', None) # "don't treat this as a package" module.__loader__ = module.__spec__ = None module.__session__ = filename if isinstance(filename, str) else repr(filename) return module diff --git a/tests/test_session.py b/tests/test_session.py index 6a6ab5e5..a967f2a7 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -230,8 +230,33 @@ def test_runtime_module(): assert runtime not in sys.modules.values() +def test_session_copy(): + with TestNamespace(): + session_buffer = io.BytesIO() + dill.dump_session(session_buffer) + + global x, y, empty + x = y = 0 # change x and create y + del empty + globals_state = globals().copy() + + session_buffer.seek(0) + main_copy = dill.load_session_copy(session_buffer) + + assert main_copy is not __main__ + assert main_copy not in sys.modules.values() + assert vars(main_copy) is not globals() + assert globals() == globals_state + + assert main_copy.__name__ == '__main__' + assert x != main_copy.x + assert 'y' not in vars(main_copy) + assert 'empty' in vars(main_copy) + + if __name__ == '__main__': test_session_main(byref=False) test_session_main(byref=True) test_session_other() test_runtime_module() + test_session_copy() From 7ceb2dbcaaa146b76c77fec02cf4ac780c82477e Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Sat, 2 Jul 2022 09:20:58 -0300 Subject: [PATCH 09/13] review: adjustments --- dill/__init__.py | 2 +- dill/_dill.py | 256 ++++++++++++++++++++++++++++++++++-------- docs/source/conf.py | 5 +- tests/test_session.py | 30 ++--- 4 files changed, 224 insertions(+), 69 deletions(-) diff --git a/dill/__init__.py b/dill/__init__.py index 38c7be7e..53ac9fa2 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -284,7 +284,7 @@ """ from ._dill import ( - dump, dumps, load, loads, dump_session, load_session, load_session_copy, + dump, dumps, load, loads, dump_session, load_session, load_vars, Pickler, Unpickler, register, copy, pickle, pickles, check, HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, PickleWarning, diff --git a/dill/_dill.py b/dill/_dill.py index 5ebc60d9..e10a1539 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -16,7 +16,7 @@ Test against CH16+ Std. Lib. ... TBD. """ __all__ = ['dump', 'dumps', 'load', 'loads', 'dump_session', 'load_session', - 'load_session_copy', 'Pickler', 'Unpickler', 'register', 'copy', + 'load_vars', 'Pickler', 'Unpickler', 'register', 'copy', 'pickle', 'pickles', 'check', 'HIGHEST_PROTOCOL', 'DEFAULT_PROTOCOL', 'PicklingError', 'UnpicklingError', 'HANDLE_FMODE', 'CONTENTS_FMODE', 'FILE_FMODE', 'PickleError', 'PickleWarning', 'PicklingWarning', @@ -78,6 +78,7 @@ def _trace(boolean): GeneratorType, DictProxyType, XRangeType, SliceType, TracebackType, \ NotImplementedType, EllipsisType, FrameType, ModuleType, \ BufferType, BuiltinMethodType, TypeType +from typing import Optional, Union from pickle import HIGHEST_PROTOCOL, PickleError, PicklingError, UnpicklingError try: from pickle import DEFAULT_PROTOCOL @@ -400,8 +401,12 @@ def loads(str, ignore=None, **kwds): ### End: Shorthands ### ### Pickle the Interpreter Session +import pathlib +import tempfile + SESSION_IMPORTED_AS_TYPES = (ModuleType, ClassType, TypeType, Exception, FunctionType, MethodType, BuiltinMethodType) +TEMPDIR = pathlib.PurePath(tempfile.gettempdir()) def _module_map(): """get map of imported modules""" @@ -483,8 +488,53 @@ def _restore_modules(unpickler, main_module): pass #NOTE: 06/03/15 renamed main_module to main -def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): - """pickle the current state of __main__ to a file""" +def dump_session( + filename = str(TEMPDIR/'session.pkl'), + main: Optional[Union[ModuleType, str]] = None, + byref: bool = False, + **kwds +) -> None: + """Pickle the current state of :py:mod:`__main__` to a file. + + Save the interpreter session (the contents of the built-in module + :py:mod:`__main__`) or the state of another module to a pickle file. This + can then be restored by calling the function :py:func:`load_session`. + + Runtime-created modules, like the ones constructed by + :py:class:`~types.ModuleType`, can also be saved and restored thereafter. + + Parameters: + filename: a path-like object or a writable stream + main: a module object or an importable module name + byref: if `True`, imported objects in the module's namespace are saved + by reference. *Note:* this is different from the ``byref`` option + of other "dump" functions and is not affected by + ``settings['byref']``. + **kwds: extra keyword arguments passed to :py:class:`Pickler()` + + Raises: + :py:exc:`PicklingError`: if pickling fails + + Examples: + - Save current session state: + + >>> import dill + >>> dill.dump_session() # save state of __main__ to /tmp/session.pkl + + - Save the state of an imported/importable module: + + >>> import my_mod as m + >>> m.var = 'new value' + >>> dill.dump_session('my_mod_session.pkl', main='my_mod') + + - Save the state of an non-importable, runtime-created module: + + >>> from types import ModuleType + >>> runtime = ModuleType('runtime') + >>> runtime.food = ['bacon', 'eggs', 'spam'] + >>> runtime.process_food = m.process_food + >>> dill.dump_session('runtime_session.pkl', main=runtime, byref=True) + """ from .settings import settings protocol = settings['protocol'] if main is None: main = _main_module @@ -509,20 +559,42 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): file.close() return -def _open_peekable(filename): - if hasattr(filename, 'read'): - file = filename - else: - file = open(filename, 'rb') - if not hasattr(file, 'peek'): +class _PeekableReader: + """lightweight stream wrapper that implements peek()""" + def __init__(self, stream): + self.stream = stream + def read(self, n): + return self.stream.read(n) + def tell(self): + return self.stream.tell() + def close(self): + return self.stream.close() + def peek(self, n): + stream = self.stream try: - import io - file = io.BufferedReader(file) + if hasttr(stream, 'flush'): stream.flush() + position = stream.tell() + stream.seek(position) # assert seek() works before reading + chunk = stream.read(n) + stream.seek(position) + return chunk + except (AttributeError, OSError): + raise NotImplementedError("stream is not peekable: %r", stream) from None + +def _make_peekable(stream): + """return stream as an object with a peek() method""" + import io + if hasattr(stream, 'peek'): + return stream + if not (hasattr(stream, 'tell') and hasattr(stream, 'seek')): + try: + return io.BufferedReader(stream) except Exception: - pass # ...and hope for the best - return file + pass + return _PeekableReader(stream) -def _inspect_pickle(file, main_is_none): +def _identify_session_module(file, main_is_none): + """identify the session file's module name""" from pickletools import genops UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} found_import = False @@ -534,25 +606,81 @@ def _inspect_pickle(file, main_is_none): found_import = True else: if opcode.name in UNICODE: - if not all(name.isidentifier() for name in arg.split('.')): - raise UnpicklingError("invalid module name: %r" % arg) return arg else: raise UnpicklingError("reached STOP without finding main module") - except (AttributeError, ValueError) as error: - if isinstance(error, AttributeError) and not main_is_none: - # File is not peekable, but we have main. + except (NotImplementedError, ValueError) as error: + # ValueError occours when the end of the chunk is reached (without a STOP). + if isinstance(error, NotImplementedError) and not main_is_none: + # file is not peekable, but we have main. return None raise UnpicklingError("unable to identify main module") from error -def load_session(filename='/tmp/session.pkl', main=None, **kwds): - """update the __main__ module with the state from the session file""" - file = _open_peekable(filename) +def load_session( + filename = str(TEMPDIR/'session.pkl'), + main: Union[ModuleType, str] = None, + **kwds +) -> Optional[ModuleType]: + """Update the module :py:mod:`__main__` with the state from the session file. + + Restore the interpreter session (the built-in module :py:mod:`__main__`) or + the state of another module from a pickle file created by the function + :py:func:`dump_session`. + + If loading the state of a (non-importable) runtime-created module, a version + of this module may be passed as the argument ``main``. Otherwise, a new + module object is created with :py:class:`~types.ModuleType` and returned + after it's updated. + + Parameters: + filename: a path-like object or a readable stream + main: an importable module name or a module object (optional) + **kwds: extra keyword arguments passed to :py:class:`Unpickler()` + + Raises: + :py:exc:`UnpicklingError`: if unpickling fails + + Returns: + the restored module if different from :py:mod:`__main__` + + Examples: + - Load a saved session state: + + >>> import dill, sys + >>> dill.load_session() # updates __main__ from /tmp/session.pkl + >>> restored_var + 'this variable was created/updated by load_session()' + + - Load the saved state of an importable module: + + >>> my_mod = dill.load_session('my_mod_session.pkl') + >>> my_mod.var + 'new value' + >>> my_mod in sys.modules.values() + True + + - Load the saved state of a non-importable, runtime-created module: + + >>> runtime = dill.load_session('runtime_session.pkl') + >>> runtime.process_food is my_mod.process_food # was saved by reference + True + >>> runtime in sys.modules.values() + False + + See also: + :py:func:`load_vars` to load the contents of a saved session (from + :py:mod:`__main__` or any importable module) into a dictionary. + """ + if hasattr(filename, 'read'): + file = filename + else: + file = open(filename, 'rb') try: + file = _make_peekable(file) #FIXME: dill.settings are disabled unpickler = Unpickler(file, **kwds) unpickler._session = True - pickle_main = _inspect_pickle(file, main is None) + pickle_main = _identify_session_module(file, main is None) # Resolve unpickler._main if main is None and pickle_main is not None: @@ -603,40 +731,74 @@ def load_session(filename='/tmp/session.pkl', main=None, **kwds): if module is not _main_module: return module -def load_session_copy(filename='/tmp/session.pkl', **kwds): +def load_vars( + filename = str(TEMPDIR/'session.pkl'), + update: bool = False, + **kwds +) -> dict: """ - Load the state of a module saved to a session file into a runtime created module. - - The loaded module's origin is stored in the '__session__' attribute. - Warning: this function is completely thread-unsafe. + Load the contents of a module from a session file into a dictionary. + + The loaded module's origin is stored in the ``__session__`` attribute. + + Parameters: + filename: a path-like object or a readable stream + update: if `True`, the dictionary is updated with the current state of + module before loading variables from the session file + **kwds: extra keyword arguments passed to :py:class:`Unpickler()` + + Raises: + :py:exc:`UnpicklingError`: if unpickling fails + + Example: + >>> import dill + >>> alist = [1, 2, 3] + >>> anum = 42 + >>> dill.dump_session() + >>> anum = 0 + >>> new_var = 'spam' + >>> main_vars = dill.load_vars() + >>> main_vars['__name__'], main_vars['__session__'] + ('__main__', '/tmp/session.pkl') + >>> main_vars is globals() # loaded objects don't reference current global variables + False + >>> main_vars['alist'] == alist + True + >>> main_vars['alist'] is alist # was saved by value + False + >>> main_vars['anum'] == anum # changed after the session was saved + False + >>> new_var in main_vars # would be True if the option 'update' was set + False """ if 'main' in kwds: - raise TypeError("'main' is an invalid keyword argument for load_session_copy()") - file = _open_peekable(filename) + raise TypeError("'main' is an invalid keyword argument for load_vars()") + if hasattr(filename, 'read'): + file = filename + else: + file = open(filename, 'rb') try: - pickle_main = _inspect_pickle(file, main_is_none=True) - main = _import_module(pickle_main) - main_globals = vars(main).copy() - vars(main).clear() - for attr in ('__builtins__', '__loader__', '__name__'): - # Required by load_session(). - if attr in main_globals: - setattr(main, attr, main_globals[attr]) + file = _make_peekable(file) + main_name = _identify_session_module(file, main_is_none=True) + old_main = sys.modules.get(main_name) + main = ModuleType(main_name) + if update: + vars(main).update(vars(old_main)) + main.__builtins__ = __builtin__ + sys.modules[main_name] = main load_session(file, **kwds) - module = ModuleType(main.__name__) - vars(module).update(vars(main)) + main.__session__ = filename if isinstance(filename, str) else repr(filename) finally: if not hasattr(filename, 'read'): # if newly opened file file.close() try: - vars(main).clear() - vars(main).update(main_globals) - except NameError: + if old_main is None: + del sys.modules[main_name] + else: + sys.modules[main_name] = old_main + except NameError: # failed before setting old_main pass - vars(module).pop('__path__', None) # "don't treat this as a package" - module.__loader__ = module.__spec__ = None - module.__session__ = filename if isinstance(filename, str) else repr(filename) - return module + return vars(main) ### End: Pickle the Interpreter diff --git a/docs/source/conf.py b/docs/source/conf.py index 19171caf..caf7a7ca 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -65,12 +65,13 @@ # extension config github_project_url = "https://github.com/uqfoundation/dill" -autoclass_content= 'both' +autoclass_content = 'both' +autodoc_typehints = 'description' napoleon_include_init_with_doc = True napoleon_include_private_with_doc = False napoleon_include_special_with_doc = True -napoleon_use_param = False napoleon_use_ivar = True +napoleon_use_param = True # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/tests/test_session.py b/tests/test_session.py index a967f2a7..deb03e85 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -85,7 +85,6 @@ def weekdays(self): cal = CalendarSubclass() selfref = __main__ - # Setup global namespace for session saving tests. class TestNamespace: test_globals = globals().copy() @@ -101,7 +100,6 @@ def __exit__(self, *exc_info): globals().clear() globals().update(self.backup) - def _clean_up_cache(module): cached = module.__file__.split('.', 1)[0] + '.pyc' cached = module.__cached__ if hasattr(module, '__cached__') else cached @@ -114,7 +112,6 @@ def _clean_up_cache(module): atexit.register(_clean_up_cache, local_mod) - def _test_objects(main, globals_copy, byref): try: main_dict = vars(__main__) @@ -145,7 +142,6 @@ def _test_objects(main, globals_copy, byref): error.args = (_error_line(error, obj, byref),) raise - def test_session_main(byref): """test dump/load_session() for __main__, both in this process and in a subprocess""" extra_objects = {} @@ -174,7 +170,6 @@ def test_session_main(byref): dill.load_session(session_buffer) ns.backup['_test_objects'](__main__, ns.backup, byref) - def test_session_other(): """test dump/load_session() for a module other than __main__""" import test_classdef as module @@ -194,7 +189,6 @@ def test_session_other(): assert all(obj in module.__dict__ for obj in dict_objects) assert module.selfref is module - def test_runtime_module(): from types import ModuleType modname = '__runtime__' @@ -229,34 +223,32 @@ def test_runtime_module(): assert runtime.x == 42 assert runtime not in sys.modules.values() - -def test_session_copy(): +def test_load_vars(): with TestNamespace(): session_buffer = io.BytesIO() dill.dump_session(session_buffer) - global x, y, empty + global empty, names, x, y x = y = 0 # change x and create y del empty globals_state = globals().copy() session_buffer.seek(0) - main_copy = dill.load_session_copy(session_buffer) + main_vars = dill.load_vars(session_buffer) - assert main_copy is not __main__ - assert main_copy not in sys.modules.values() - assert vars(main_copy) is not globals() + assert main_vars is not globals() assert globals() == globals_state - assert main_copy.__name__ == '__main__' - assert x != main_copy.x - assert 'y' not in vars(main_copy) - assert 'empty' in vars(main_copy) - + assert main_vars['__name__'] == '__main__' + assert main_vars['names'] == names + assert main_vars['names'] is not names + assert main_vars['x'] != x + assert 'y' not in main_vars + assert 'empty' in main_vars if __name__ == '__main__': test_session_main(byref=False) test_session_main(byref=True) test_session_other() test_runtime_module() - test_session_copy() + test_load_vars() From a080ecadcedb4b76f1955f45165507f10118bbef Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Tue, 5 Jul 2022 14:22:53 -0300 Subject: [PATCH 10/13] small fixes --- dill/_dill.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index ff9ade0a..de03e8e0 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -480,6 +480,8 @@ def __init__(self, stream): self.stream = stream def read(self, n): return self.stream.read(n) + def readline(self): + return self.stream.readline() def tell(self): return self.stream.tell() def close(self): @@ -487,7 +489,7 @@ def close(self): def peek(self, n): stream = self.stream try: - if hasttr(stream, 'flush'): stream.flush() + if hasattr(stream, 'flush'): stream.flush() position = stream.tell() stream.seek(position) # assert seek() works before reading chunk = stream.read(n) From 0973b0bd47003aed82ec90c118482a17c1f90f8b Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Wed, 6 Jul 2022 12:42:51 -0300 Subject: [PATCH 11/13] use __dict__ --- dill/_dill.py | 4 ++-- dill/tests/test_session.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index de03e8e0..f8ba38db 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -700,7 +700,7 @@ def load_vars( old_main = sys.modules.get(main_name) main = ModuleType(main_name) if update: - vars(main).update(vars(old_main)) + main.__dict__.update(old_main.__dict__) main.__builtins__ = __builtin__ sys.modules[main_name] = main load_session(file, **kwds) @@ -715,7 +715,7 @@ def load_vars( sys.modules[main_name] = old_main except NameError: # failed before setting old_main pass - return vars(main) + return main.__dict__ ### End: Pickle the Interpreter diff --git a/dill/tests/test_session.py b/dill/tests/test_session.py index ceece465..67c14e2c 100644 --- a/dill/tests/test_session.py +++ b/dill/tests/test_session.py @@ -34,7 +34,7 @@ def test_modules(byref): # independent of the value of byref. Tried to run garbage collection just before loading the # session with no luck. It fails even when preceding them with 'import calendar'. Needed to # run these kinds of tests in a supbrocess. Failing test sample: - # assert globals()['day_name'] is vars(sys.modules['calendar'])['day_name'] + # assert globals()['day_name'] is sys.modules['calendar'].__dict__['day_name'] try: for obj in ('json', 'url', 'local_mod', 'sax', 'dom'): assert globals()[obj].__name__ in sys.modules @@ -119,7 +119,7 @@ def _clean_up_cache(module): def _test_objects(main, globals_copy, byref): try: - main_dict = vars(__main__) + main_dict = __main__.__dict__ global Person, person, Calendar, CalendarSubclass, cal, selfref for obj in ('json', 'url', 'local_mod', 'sax', 'dom'): From ba1b36f9141ff690c7fdcdcb62de4e2e93cab1b7 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Thu, 7 Jul 2022 21:18:48 -0300 Subject: [PATCH 12/13] naming changes --- dill/__init__.py | 12 ++--- dill/_dill.py | 100 ++++++++++++++++++++++++------------- dill/tests/test_session.py | 77 ++++++++++++++-------------- 3 files changed, 110 insertions(+), 79 deletions(-) diff --git a/dill/__init__.py b/dill/__init__.py index 2f95cf25..76419283 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -289,12 +289,12 @@ """ from ._dill import ( - dump, dumps, load, loads, dump_session, load_session, load_vars, - Pickler, Unpickler, register, copy, pickle, pickles, check, - HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, - HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, PickleWarning, - PicklingWarning, UnpicklingWarning, - ) + dump, dumps, load, loads, dump_module, load_module, load_module_vars, + dump_session, load_session, Pickler, Unpickler, register, copy, pickle, + pickles, check, HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, + UnpicklingError, HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, + PickleWarning, PicklingWarning, UnpicklingWarning, +) from . import source, temp, detect # get global settings diff --git a/dill/_dill.py b/dill/_dill.py index f8ba38db..1cdd6bcc 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -15,12 +15,14 @@ Test against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns. Test against CH16+ Std. Lib. ... TBD. """ -__all__ = ['dump', 'dumps', 'load', 'loads', 'dump_session', 'load_session', - 'load_vars', 'Pickler', 'Unpickler', 'register', 'copy', - 'pickle', 'pickles', 'check', 'HIGHEST_PROTOCOL', 'DEFAULT_PROTOCOL', - 'PicklingError', 'UnpicklingError', 'HANDLE_FMODE', 'CONTENTS_FMODE', - 'FILE_FMODE', 'PickleError', 'PickleWarning', 'PicklingWarning', - 'UnpicklingWarning'] +__all__ = [ + 'dump', 'dumps', 'load', 'loads', 'dump_module', 'load_module', + 'load_module_vars', 'dump_session', 'load_session', 'Pickler', 'Unpickler', + 'register', 'copy', 'pickle', 'pickles', 'check', 'HIGHEST_PROTOCOL', + 'DEFAULT_PROTOCOL', 'PicklingError', 'UnpicklingError', 'HANDLE_FMODE', + 'CONTENTS_FMODE', 'FILE_FMODE', 'PickleError', 'PickleWarning', + 'PicklingWarning', 'UnpicklingWarning' +] __module__ = 'dill' @@ -403,17 +405,17 @@ def _restore_modules(unpickler, main_module): pass #NOTE: 06/03/15 renamed main_module to main -def dump_session( +def dump_module( filename = str(TEMPDIR/'session.pkl'), main: Optional[Union[ModuleType, str]] = None, - byref: bool = False, + imported_byref: bool = False, **kwds ) -> None: - """Pickle the current state of :py:mod:`__main__` to a file. + """Pickle the current state of :py:mod:`__main__` or another module to a file. Save the interpreter session (the contents of the built-in module :py:mod:`__main__`) or the state of another module to a pickle file. This - can then be restored by calling the function :py:func:`load_session`. + can then be restored by calling the function :py:func:`load_module`. Runtime-created modules, like the ones constructed by :py:class:`~types.ModuleType`, can also be saved and restored thereafter. @@ -421,9 +423,9 @@ def dump_session( Parameters: filename: a path-like object or a writable stream main: a module object or an importable module name - byref: if `True`, imported objects in the module's namespace are saved - by reference. *Note:* this is different from the ``byref`` option - of other "dump" functions and is not affected by + imported_byref: if `True`, imported objects in the module's namespace + are saved by reference. *Note:* this is different from the ``byref`` + option of other "dump" functions and is not affected by ``settings['byref']``. **kwds: extra keyword arguments passed to :py:class:`Pickler()` @@ -434,13 +436,13 @@ def dump_session( - Save current session state: >>> import dill - >>> dill.dump_session() # save state of __main__ to /tmp/session.pkl + >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl - Save the state of an imported/importable module: >>> import my_mod as m >>> m.var = 'new value' - >>> dill.dump_session('my_mod_session.pkl', main='my_mod') + >>> dill.dump_module('my_mod_session.pkl', main='my_mod') - Save the state of an non-importable, runtime-created module: @@ -448,8 +450,24 @@ def dump_session( >>> runtime = ModuleType('runtime') >>> runtime.food = ['bacon', 'eggs', 'spam'] >>> runtime.process_food = m.process_food - >>> dill.dump_session('runtime_session.pkl', main=runtime, byref=True) + >>> dill.dump_module('runtime_session.pkl', main=runtime, byref=True) + + *Changed in version 0.3.6:* the function ``dump_session()`` was renamed to + ``dump_module()``. + + *Changed in version 0.3.6:* the parameter ``byref`` was renamed to + ``imported_byref``. """ + if 'byref' in kwds: + warnings.warn( + "The parameter 'byref' was renamed to 'imported_byref', use this" + " instead. Note: the underlying dill.Pickler do accept a 'byref'" + " argument, but it has no effect on session saving.", + PendingDeprecationWarning + ) + if imported_byref: + raise ValueError("both 'imported_byref' and 'byref' arguments were used.") + imported_byref = kwds.pop('byref') from .settings import settings protocol = settings['protocol'] if main is None: main = _main_module @@ -460,7 +478,7 @@ def dump_session( try: pickler = Pickler(file, protocol, **kwds) pickler._original_main = main - if byref: + if imported_byref: main = _stash_modules(main) pickler._main = main #FIXME: dill.settings are disabled pickler._byref = False # disable pickling by name reference @@ -474,6 +492,12 @@ def dump_session( file.close() return +# Backward compatibility. +def dump_session(filename=str(TEMPDIR/'session.pkl'), main=None, byref=False, **kwds): + warnings.warn("dump_session() was renamed to dump_module().", PendingDeprecationWarning) + dump_module(filename, main, imported_byref=byref, **kwds) +dump_session.__doc__ = dump_module.__doc__ + class _PeekableReader: """lightweight stream wrapper that implements peek()""" def __init__(self, stream): @@ -510,7 +534,7 @@ def _make_peekable(stream): pass return _PeekableReader(stream) -def _identify_session_module(file, main_is_none): +def _identify_module(file, main=None): """identify the session file's module name""" from pickletools import genops UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} @@ -528,21 +552,21 @@ def _identify_session_module(file, main_is_none): raise UnpicklingError("reached STOP without finding main module") except (NotImplementedError, ValueError) as error: # ValueError occours when the end of the chunk is reached (without a STOP). - if isinstance(error, NotImplementedError) and not main_is_none: + if isinstance(error, NotImplementedError) and main is not None: # file is not peekable, but we have main. return None raise UnpicklingError("unable to identify main module") from error -def load_session( +def load_module( filename = str(TEMPDIR/'session.pkl'), main: Union[ModuleType, str] = None, **kwds ) -> Optional[ModuleType]: - """Update the module :py:mod:`__main__` with the state from the session file. + """Update :py:mod:`__main__` or another module with the state from the session file. Restore the interpreter session (the built-in module :py:mod:`__main__`) or the state of another module from a pickle file created by the function - :py:func:`dump_session`. + :py:func:`dump_module`. If loading the state of a (non-importable) runtime-created module, a version of this module may be passed as the argument ``main``. Otherwise, a new @@ -564,13 +588,13 @@ def load_session( - Load a saved session state: >>> import dill, sys - >>> dill.load_session() # updates __main__ from /tmp/session.pkl + >>> dill.load_module() # updates __main__ from /tmp/session.pkl >>> restored_var - 'this variable was created/updated by load_session()' + 'this variable was created/updated by load_module()' - Load the saved state of an importable module: - >>> my_mod = dill.load_session('my_mod_session.pkl') + >>> my_mod = dill.load_module('my_mod_session.pkl') >>> my_mod.var 'new value' >>> my_mod in sys.modules.values() @@ -578,14 +602,14 @@ def load_session( - Load the saved state of a non-importable, runtime-created module: - >>> runtime = dill.load_session('runtime_session.pkl') + >>> runtime = dill.load_module('runtime_session.pkl') >>> runtime.process_food is my_mod.process_food # was saved by reference True >>> runtime in sys.modules.values() False See also: - :py:func:`load_vars` to load the contents of a saved session (from + :py:func:`load_module_vars` to load the contents of a saved session (from :py:mod:`__main__` or any importable module) into a dictionary. """ if hasattr(filename, 'read'): @@ -597,7 +621,7 @@ def load_session( #FIXME: dill.settings are disabled unpickler = Unpickler(file, **kwds) unpickler._session = True - pickle_main = _identify_session_module(file, main is None) + pickle_main = _identify_module(file, main) # Resolve unpickler._main if main is None and pickle_main is not None: @@ -648,7 +672,13 @@ def load_session( if module is not _main_module: return module -def load_vars( +# Backward compatibility. +def load_session(filename=str(TEMPDIR/'session.pkl'), main=None, **kwds): + warnings.warn("load_session() was renamed to load_module().", PendingDeprecationWarning) + load_module(filename, main, **kwds) +load_session.__doc__ = load_module.__doc__ + +def load_module_vars( filename = str(TEMPDIR/'session.pkl'), update: bool = False, **kwds @@ -661,7 +691,7 @@ def load_vars( Parameters: filename: a path-like object or a readable stream update: if `True`, the dictionary is updated with the current state of - module before loading variables from the session file + the module before loading variables from the session file **kwds: extra keyword arguments passed to :py:class:`Unpickler()` Raises: @@ -671,10 +701,10 @@ def load_vars( >>> import dill >>> alist = [1, 2, 3] >>> anum = 42 - >>> dill.dump_session() + >>> dill.dump_module() >>> anum = 0 >>> new_var = 'spam' - >>> main_vars = dill.load_vars() + >>> main_vars = dill.load_module_vars() >>> main_vars['__name__'], main_vars['__session__'] ('__main__', '/tmp/session.pkl') >>> main_vars is globals() # loaded objects don't reference current global variables @@ -689,21 +719,21 @@ def load_vars( False """ if 'main' in kwds: - raise TypeError("'main' is an invalid keyword argument for load_vars()") + raise TypeError("'main' is an invalid keyword argument for load_module_vars()") if hasattr(filename, 'read'): file = filename else: file = open(filename, 'rb') try: file = _make_peekable(file) - main_name = _identify_session_module(file, main_is_none=True) + main_name = _identify_module(file) old_main = sys.modules.get(main_name) main = ModuleType(main_name) if update: main.__dict__.update(old_main.__dict__) main.__builtins__ = __builtin__ sys.modules[main_name] = main - load_session(file, **kwds) + load_module(file, **kwds) main.__session__ = filename if isinstance(filename, str) else repr(filename) finally: if not hasattr(filename, 'read'): # if newly opened file diff --git a/dill/tests/test_session.py b/dill/tests/test_session.py index 67c14e2c..6c507034 100644 --- a/dill/tests/test_session.py +++ b/dill/tests/test_session.py @@ -19,21 +19,22 @@ # Child process # ################### -def _error_line(error, obj, byref): +def _error_line(error, obj, imported_byref): import traceback line = traceback.format_exc().splitlines()[-2].replace('[obj]', '['+repr(obj)+']') - return "while testing (with byref=%s): %s" % (byref, line.lstrip()) + return "while testing (with imported_byref=%s): %s" % (imported_byref, line.lstrip()) if __name__ == '__main__' and len(sys.argv) >= 3 and sys.argv[1] == '--child': # Test session loading in a fresh interpreter session. - byref = (sys.argv[2] == 'True') - dill.load_session(session_file % byref) - - def test_modules(byref): - # FIXME: In this test setting with CPython 3.7, 'calendar' is not included in sys.modules, - # independent of the value of byref. Tried to run garbage collection just before loading the - # session with no luck. It fails even when preceding them with 'import calendar'. Needed to - # run these kinds of tests in a supbrocess. Failing test sample: + imported_byref = (sys.argv[2] == 'True') + dill.load_module(session_file % imported_byref) + + def test_modules(imported_byref): + # FIXME: In this test setting with CPython 3.7, 'calendar' is not included + # in sys.modules, independent of the value of imported_byref. Tried to + # run garbage collection just before loading the session with no luck. It + # fails even when preceding them with 'import calendar'. Needed to run + # these kinds of tests in a supbrocess. Failing test sample: # assert globals()['day_name'] is sys.modules['calendar'].__dict__['day_name'] try: for obj in ('json', 'url', 'local_mod', 'sax', 'dom'): @@ -44,16 +45,16 @@ def test_modules(byref): for obj in ('Calendar', 'isleap'): assert globals()[obj] is sys.modules['calendar'].__dict__[obj] assert __main__.day_name.__module__ == 'calendar' - if byref: + if imported_byref: assert __main__.day_name is calendar.day_name assert __main__.complex_log is cmath.log except AssertionError as error: - error.args = (_error_line(error, obj, byref),) + error.args = (_error_line(error, obj, imported_byref),) raise - test_modules(byref) + test_modules(imported_byref) sys.exit() #################### @@ -117,7 +118,7 @@ def _clean_up_cache(module): atexit.register(_clean_up_cache, local_mod) -def _test_objects(main, globals_copy, byref): +def _test_objects(main, globals_copy, imported_byref): try: main_dict = __main__.__dict__ global Person, person, Calendar, CalendarSubclass, cal, selfref @@ -143,13 +144,13 @@ def _test_objects(main, globals_copy, byref): assert selfref is __main__ except AssertionError as error: - error.args = (_error_line(error, obj, byref),) + error.args = (_error_line(error, obj, imported_byref),) raise -def test_session_main(byref): - """test dump/load_session() for __main__, both in this process and in a subprocess""" +def test_session_main(imported_byref): + """test dump/load_module() for __main__, both in this process and in a subprocess""" extra_objects = {} - if byref: + if imported_byref: # Test unpickleable imported object in main. from sys import flags extra_objects['flags'] = flags @@ -157,38 +158,38 @@ def test_session_main(byref): with TestNamespace(**extra_objects) as ns: try: # Test session loading in a new session. - dill.dump_session(session_file % byref, byref=byref) + dill.dump_module(session_file % imported_byref, imported_byref=imported_byref) from dill.tests.__main__ import python, shell, sp - error = sp.call([python, __file__, '--child', str(byref)], shell=shell) + error = sp.call([python, __file__, '--child', str(imported_byref)], shell=shell) if error: sys.exit(error) finally: try: - os.remove(session_file % byref) + os.remove(session_file % imported_byref) except OSError: pass # Test session loading in the same session. session_buffer = BytesIO() - dill.dump_session(session_buffer, byref=byref) + dill.dump_module(session_buffer, imported_byref=imported_byref) session_buffer.seek(0) - dill.load_session(session_buffer) - ns.backup['_test_objects'](__main__, ns.backup, byref) + dill.load_module(session_buffer) + ns.backup['_test_objects'](__main__, ns.backup, imported_byref) def test_session_other(): - """test dump/load_session() for a module other than __main__""" + """test dump/load_module() for a module other than __main__""" import test_classdef as module atexit.register(_clean_up_cache, module) module.selfref = module dict_objects = [obj for obj in module.__dict__.keys() if not obj.startswith('__')] session_buffer = BytesIO() - dill.dump_session(session_buffer, main=module) + dill.dump_module(session_buffer, main=module) for obj in dict_objects: del module.__dict__[obj] session_buffer.seek(0) - dill.load_session(session_buffer) #, main=module) + dill.load_module(session_buffer) #, main=module) assert all(obj in module.__dict__ for obj in dict_objects) assert module.selfref is module @@ -205,32 +206,32 @@ def test_runtime_module(): mod.__dill_imported, mod.__dill_imported_as, mod.__dill_imported_top_level, file=sys.stderr) - # This is also for code coverage, tests the use case of dump_session(byref=True) + # This is also for code coverage, tests the use case of dump_module(imported_byref=True) # without imported objects in the namespace. It's a contrived example because # even dill can't be in it. This should work after fixing #462. session_buffer = BytesIO() - dill.dump_session(session_buffer, main=runtime, byref=True) + dill.dump_module(session_buffer, main=runtime, imported_byref=True) session_dump = session_buffer.getvalue() # Pass a new runtime created module with the same name. runtime = ModuleType(modname) # empty - returned_mod = dill.load_session(BytesIO(session_dump), main=runtime) + returned_mod = dill.load_module(BytesIO(session_dump), main=runtime) assert returned_mod is runtime assert runtime.__name__ == modname assert runtime.x == 42 assert runtime not in sys.modules.values() - # Pass nothing as main. load_session() must create it. + # Pass nothing as main. load_module() must create it. session_buffer.seek(0) - runtime = dill.load_session(BytesIO(session_dump)) + runtime = dill.load_module(BytesIO(session_dump)) assert runtime.__name__ == modname assert runtime.x == 42 assert runtime not in sys.modules.values() -def test_load_vars(): +def test_load_module_vars(): with TestNamespace(): session_buffer = BytesIO() - dill.dump_session(session_buffer) + dill.dump_module(session_buffer) global empty, names, x, y x = y = 0 # change x and create y @@ -238,7 +239,7 @@ def test_load_vars(): globals_state = globals().copy() session_buffer.seek(0) - main_vars = dill.load_vars(session_buffer) + main_vars = dill.load_module_vars(session_buffer) assert main_vars is not globals() assert globals() == globals_state @@ -251,8 +252,8 @@ def test_load_vars(): assert 'empty' in main_vars if __name__ == '__main__': - test_session_main(byref=False) - test_session_main(byref=True) + test_session_main(imported_byref=False) + test_session_main(imported_byref=True) test_session_other() test_runtime_module() - test_load_vars() + test_load_module_vars() From 285438c3fd827cff31e22dc9a3bc20f95f5d095f Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Sat, 9 Jul 2022 14:20:00 -0300 Subject: [PATCH 13/13] review: final renaming and adjustments --- dill/__init__.py | 2 +- dill/_dill.py | 116 +++++++++++++++++++++++++------------ dill/tests/test_session.py | 56 +++++++++--------- docs/source/dill.rst | 2 +- 4 files changed, 108 insertions(+), 68 deletions(-) diff --git a/dill/__init__.py b/dill/__init__.py index 76419283..7f974b6c 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -289,7 +289,7 @@ """ from ._dill import ( - dump, dumps, load, loads, dump_module, load_module, load_module_vars, + dump, dumps, load, loads, dump_module, load_module, load_module_asdict, dump_session, load_session, Pickler, Unpickler, register, copy, pickle, pickles, check, HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, diff --git a/dill/_dill.py b/dill/_dill.py index 1cdd6bcc..80293399 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -17,7 +17,7 @@ """ __all__ = [ 'dump', 'dumps', 'load', 'loads', 'dump_module', 'load_module', - 'load_module_vars', 'dump_session', 'load_session', 'Pickler', 'Unpickler', + 'load_module_asdict', 'dump_session', 'load_session', 'Pickler', 'Unpickler', 'register', 'copy', 'pickle', 'pickles', 'check', 'HIGHEST_PROTOCOL', 'DEFAULT_PROTOCOL', 'PicklingError', 'UnpicklingError', 'HANDLE_FMODE', 'CONTENTS_FMODE', 'FILE_FMODE', 'PickleError', 'PickleWarning', @@ -408,7 +408,7 @@ def _restore_modules(unpickler, main_module): def dump_module( filename = str(TEMPDIR/'session.pkl'), main: Optional[Union[ModuleType, str]] = None, - imported_byref: bool = False, + refimported: bool = False, **kwds ) -> None: """Pickle the current state of :py:mod:`__main__` or another module to a file. @@ -421,16 +421,16 @@ def dump_module( :py:class:`~types.ModuleType`, can also be saved and restored thereafter. Parameters: - filename: a path-like object or a writable stream - main: a module object or an importable module name - imported_byref: if `True`, imported objects in the module's namespace + filename: a path-like object or a writable stream. + main: a module object or an importable module name. + refimported: if `True`, all imported objects in the module's namespace are saved by reference. *Note:* this is different from the ``byref`` option of other "dump" functions and is not affected by ``settings['byref']``. - **kwds: extra keyword arguments passed to :py:class:`Pickler()` + **kwds: extra keyword arguments passed to :py:class:`Pickler()`. Raises: - :py:exc:`PicklingError`: if pickling fails + :py:exc:`PicklingError`: if pickling fails. Examples: - Save current session state: @@ -444,30 +444,30 @@ def dump_module( >>> m.var = 'new value' >>> dill.dump_module('my_mod_session.pkl', main='my_mod') - - Save the state of an non-importable, runtime-created module: + - Save the state of a non-importable, runtime-created module: >>> from types import ModuleType >>> runtime = ModuleType('runtime') >>> runtime.food = ['bacon', 'eggs', 'spam'] >>> runtime.process_food = m.process_food - >>> dill.dump_module('runtime_session.pkl', main=runtime, byref=True) + >>> dill.dump_module('runtime_session.pkl', main=runtime, refimported=True) *Changed in version 0.3.6:* the function ``dump_session()`` was renamed to ``dump_module()``. *Changed in version 0.3.6:* the parameter ``byref`` was renamed to - ``imported_byref``. + ``refimported``. """ if 'byref' in kwds: warnings.warn( - "The parameter 'byref' was renamed to 'imported_byref', use this" + "The parameter 'byref' was renamed to 'refimported', use this" " instead. Note: the underlying dill.Pickler do accept a 'byref'" " argument, but it has no effect on session saving.", PendingDeprecationWarning ) - if imported_byref: - raise ValueError("both 'imported_byref' and 'byref' arguments were used.") - imported_byref = kwds.pop('byref') + if refimported: + raise ValueError("both 'refimported' and 'byref' arguments were used.") + refimported = kwds.pop('byref') from .settings import settings protocol = settings['protocol'] if main is None: main = _main_module @@ -478,7 +478,7 @@ def dump_module( try: pickler = Pickler(file, protocol, **kwds) pickler._original_main = main - if imported_byref: + if refimported: main = _stash_modules(main) pickler._main = main #FIXME: dill.settings are disabled pickler._byref = False # disable pickling by name reference @@ -494,8 +494,8 @@ def dump_module( # Backward compatibility. def dump_session(filename=str(TEMPDIR/'session.pkl'), main=None, byref=False, **kwds): - warnings.warn("dump_session() was renamed to dump_module().", PendingDeprecationWarning) - dump_module(filename, main, imported_byref=byref, **kwds) + warnings.warn("dump_session() was renamed to dump_module()", PendingDeprecationWarning) + dump_module(filename, main, refimported=byref, **kwds) dump_session.__doc__ = dump_module.__doc__ class _PeekableReader: @@ -562,7 +562,8 @@ def load_module( main: Union[ModuleType, str] = None, **kwds ) -> Optional[ModuleType]: - """Update :py:mod:`__main__` or another module with the state from the session file. + """Update :py:mod:`__main__` or another module with the state from the + session file. Restore the interpreter session (the built-in module :py:mod:`__main__`) or the state of another module from a pickle file created by the function @@ -574,15 +575,18 @@ def load_module( after it's updated. Parameters: - filename: a path-like object or a readable stream - main: an importable module name or a module object (optional) - **kwds: extra keyword arguments passed to :py:class:`Unpickler()` + filename: a path-like object or a readable stream. + main: an importable module name or a module object. + **kwds: extra keyword arguments passed to :py:class:`Unpickler()`. Raises: - :py:exc:`UnpicklingError`: if unpickling fails + :py:exc:`UnpicklingError`: if unpickling fails. + :py:exc:`ValueError`: if the ``main`` argument and the session file's + module are incompatible. Returns: - the restored module if different from :py:mod:`__main__` + The restored module if it's different from :py:mod:`__main__` and + wasn't passed as the ``main`` argument. Examples: - Load a saved session state: @@ -608,10 +612,23 @@ def load_module( >>> runtime in sys.modules.values() False + - Update the state of a non-importable, runtime-created module: + + >>> from types import ModuleType + >>> runtime = ModuleType('runtime') + >>> runtime.food = ['pizza', 'burger'] + >>> dill.load_module('runtime_session.pkl', main=runtime) + >>> runtime.food + ['bacon', 'eggs', 'spam'] + + *Changed in version 0.3.6:* the function ``load_session()`` was renamed to + ``load_module()``. + See also: - :py:func:`load_module_vars` to load the contents of a saved session (from - :py:mod:`__main__` or any importable module) into a dictionary. + :py:func:`load_module_asdict` to load the contents of a saved session + (from :py:mod:`__main__` or any importable module) into a dictionary. """ + main_arg = main if hasattr(filename, 'read'): file = filename else: @@ -636,7 +653,8 @@ def load_module( if not isinstance(main, ModuleType): raise ValueError("%r is not a module" % main) unpickler._main = main - main = unpickler._main + else: + main = unpickler._main # Check against the pickle's main. is_main_imported = _is_imported_module(main) @@ -645,14 +663,20 @@ def load_module( if is_runtime_mod: pickle_main = pickle_main.partition('.')[-1] if is_runtime_mod and is_main_imported: - raise UnpicklingError("can't restore non-imported module %r into an imported one" \ - % pickle_main) + raise ValueError( + "can't restore non-imported module %r into an imported one" + % pickle_main + ) if not is_runtime_mod and not is_main_imported: - raise UnpicklingError("can't restore imported module %r into a non-imported one" \ - % pickle_main) + raise ValueError( + "can't restore imported module %r into a non-imported one" + % pickle_main + ) if main.__name__ != pickle_main: - raise UnpicklingError("can't restore module %r into module %r" \ - % (pickle_main, main.__name__)) + raise ValueError( + "can't restore module %r into module %r" + % (pickle_main, main.__name__) + ) # This is for find_class() to be able to locate it. if not is_main_imported: @@ -669,7 +693,7 @@ def load_module( pass assert module is main _restore_modules(unpickler, module) - if module is not _main_module: + if not (module is _main_module or module is main_arg): return module # Backward compatibility. @@ -678,7 +702,7 @@ def load_session(filename=str(TEMPDIR/'session.pkl'), main=None, **kwds): load_module(filename, main, **kwds) load_session.__doc__ = load_module.__doc__ -def load_module_vars( +def load_module_asdict( filename = str(TEMPDIR/'session.pkl'), update: bool = False, **kwds @@ -686,6 +710,12 @@ def load_module_vars( """ Load the contents of a module from a session file into a dictionary. + ``load_module_asdict()`` does the equivalent of this function:: + + lambda filename: vars(load_module(filename)).copy() + + but without changing the original module. + The loaded module's origin is stored in the ``__session__`` attribute. Parameters: @@ -697,6 +727,13 @@ def load_module_vars( Raises: :py:exc:`UnpicklingError`: if unpickling fails + Returns: + A copy of the restored module's dictionary. + + Note: + If the ``update`` option is used, the original module will be loaded if + it wasn't yet. + Example: >>> import dill >>> alist = [1, 2, 3] @@ -704,7 +741,7 @@ def load_module_vars( >>> dill.dump_module() >>> anum = 0 >>> new_var = 'spam' - >>> main_vars = dill.load_module_vars() + >>> main_vars = dill.load_module_asdict() >>> main_vars['__name__'], main_vars['__session__'] ('__main__', '/tmp/session.pkl') >>> main_vars is globals() # loaded objects don't reference current global variables @@ -719,7 +756,7 @@ def load_module_vars( False """ if 'main' in kwds: - raise TypeError("'main' is an invalid keyword argument for load_module_vars()") + raise TypeError("'main' is an invalid keyword argument for load_module_asdict()") if hasattr(filename, 'read'): file = filename else: @@ -730,11 +767,14 @@ def load_module_vars( old_main = sys.modules.get(main_name) main = ModuleType(main_name) if update: + if old_main is None: + old_main = _import_module(main_name) main.__dict__.update(old_main.__dict__) - main.__builtins__ = __builtin__ + else: + main.__builtins__ = __builtin__ sys.modules[main_name] = main load_module(file, **kwds) - main.__session__ = filename if isinstance(filename, str) else repr(filename) + main.__session__ = str(filename) finally: if not hasattr(filename, 'read'): # if newly opened file file.close() diff --git a/dill/tests/test_session.py b/dill/tests/test_session.py index 6c507034..8f687934 100644 --- a/dill/tests/test_session.py +++ b/dill/tests/test_session.py @@ -13,25 +13,25 @@ import dill -session_file = os.path.join(os.path.dirname(__file__), 'session-byref-%s.pkl') +session_file = os.path.join(os.path.dirname(__file__), 'session-refimported-%s.pkl') ################### # Child process # ################### -def _error_line(error, obj, imported_byref): +def _error_line(error, obj, refimported): import traceback line = traceback.format_exc().splitlines()[-2].replace('[obj]', '['+repr(obj)+']') - return "while testing (with imported_byref=%s): %s" % (imported_byref, line.lstrip()) + return "while testing (with refimported=%s): %s" % (refimported, line.lstrip()) if __name__ == '__main__' and len(sys.argv) >= 3 and sys.argv[1] == '--child': # Test session loading in a fresh interpreter session. - imported_byref = (sys.argv[2] == 'True') - dill.load_module(session_file % imported_byref) + refimported = (sys.argv[2] == 'True') + dill.load_module(session_file % refimported) - def test_modules(imported_byref): + def test_modules(refimported): # FIXME: In this test setting with CPython 3.7, 'calendar' is not included - # in sys.modules, independent of the value of imported_byref. Tried to + # in sys.modules, independent of the value of refimported. Tried to # run garbage collection just before loading the session with no luck. It # fails even when preceding them with 'import calendar'. Needed to run # these kinds of tests in a supbrocess. Failing test sample: @@ -45,16 +45,16 @@ def test_modules(imported_byref): for obj in ('Calendar', 'isleap'): assert globals()[obj] is sys.modules['calendar'].__dict__[obj] assert __main__.day_name.__module__ == 'calendar' - if imported_byref: + if refimported: assert __main__.day_name is calendar.day_name assert __main__.complex_log is cmath.log except AssertionError as error: - error.args = (_error_line(error, obj, imported_byref),) + error.args = (_error_line(error, obj, refimported),) raise - test_modules(imported_byref) + test_modules(refimported) sys.exit() #################### @@ -118,7 +118,7 @@ def _clean_up_cache(module): atexit.register(_clean_up_cache, local_mod) -def _test_objects(main, globals_copy, imported_byref): +def _test_objects(main, globals_copy, refimported): try: main_dict = __main__.__dict__ global Person, person, Calendar, CalendarSubclass, cal, selfref @@ -144,13 +144,13 @@ def _test_objects(main, globals_copy, imported_byref): assert selfref is __main__ except AssertionError as error: - error.args = (_error_line(error, obj, imported_byref),) + error.args = (_error_line(error, obj, refimported),) raise -def test_session_main(imported_byref): +def test_session_main(refimported): """test dump/load_module() for __main__, both in this process and in a subprocess""" extra_objects = {} - if imported_byref: + if refimported: # Test unpickleable imported object in main. from sys import flags extra_objects['flags'] = flags @@ -158,22 +158,22 @@ def test_session_main(imported_byref): with TestNamespace(**extra_objects) as ns: try: # Test session loading in a new session. - dill.dump_module(session_file % imported_byref, imported_byref=imported_byref) + dill.dump_module(session_file % refimported, refimported=refimported) from dill.tests.__main__ import python, shell, sp - error = sp.call([python, __file__, '--child', str(imported_byref)], shell=shell) + error = sp.call([python, __file__, '--child', str(refimported)], shell=shell) if error: sys.exit(error) finally: try: - os.remove(session_file % imported_byref) + os.remove(session_file % refimported) except OSError: pass # Test session loading in the same session. session_buffer = BytesIO() - dill.dump_module(session_buffer, imported_byref=imported_byref) + dill.dump_module(session_buffer, refimported=refimported) session_buffer.seek(0) dill.load_module(session_buffer) - ns.backup['_test_objects'](__main__, ns.backup, imported_byref) + ns.backup['_test_objects'](__main__, ns.backup, refimported) def test_session_other(): """test dump/load_module() for a module other than __main__""" @@ -206,17 +206,17 @@ def test_runtime_module(): mod.__dill_imported, mod.__dill_imported_as, mod.__dill_imported_top_level, file=sys.stderr) - # This is also for code coverage, tests the use case of dump_module(imported_byref=True) + # This is also for code coverage, tests the use case of dump_module(refimported=True) # without imported objects in the namespace. It's a contrived example because # even dill can't be in it. This should work after fixing #462. session_buffer = BytesIO() - dill.dump_module(session_buffer, main=runtime, imported_byref=True) + dill.dump_module(session_buffer, main=runtime, refimported=True) session_dump = session_buffer.getvalue() # Pass a new runtime created module with the same name. runtime = ModuleType(modname) # empty - returned_mod = dill.load_module(BytesIO(session_dump), main=runtime) - assert returned_mod is runtime + return_val = dill.load_module(BytesIO(session_dump), main=runtime) + assert return_val is None assert runtime.__name__ == modname assert runtime.x == 42 assert runtime not in sys.modules.values() @@ -228,7 +228,7 @@ def test_runtime_module(): assert runtime.x == 42 assert runtime not in sys.modules.values() -def test_load_module_vars(): +def test_load_module_asdict(): with TestNamespace(): session_buffer = BytesIO() dill.dump_module(session_buffer) @@ -239,7 +239,7 @@ def test_load_module_vars(): globals_state = globals().copy() session_buffer.seek(0) - main_vars = dill.load_module_vars(session_buffer) + main_vars = dill.load_module_asdict(session_buffer) assert main_vars is not globals() assert globals() == globals_state @@ -252,8 +252,8 @@ def test_load_module_vars(): assert 'empty' in main_vars if __name__ == '__main__': - test_session_main(imported_byref=False) - test_session_main(imported_byref=True) + test_session_main(refimported=False) + test_session_main(refimported=True) test_session_other() test_runtime_module() - test_load_module_vars() + test_load_module_asdict() diff --git a/docs/source/dill.rst b/docs/source/dill.rst index 9061863c..31d41c91 100644 --- a/docs/source/dill.rst +++ b/docs/source/dill.rst @@ -11,7 +11,7 @@ dill module :special-members: :show-inheritance: :imported-members: -.. :exclude-members: + :exclude-members: dump_session, load_session detect module -------------