diff --git a/.travis.yml b/.travis.yml index 7661144e2..dc0fb07d4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,8 +13,8 @@ matrix: dist: trusty python: "pypy3" - os: linux - if: commit_message =~ /(\[ci python-nightly\])/ env: PYTHON_NIGHTLY=1 + python: 3.7 - os: linux python: 3.7 - os: linux @@ -91,8 +91,12 @@ install: - $PYTHON_EXE -m pip install . - $PYTHON_EXE -m pip install --upgrade -r dev-requirements.txt - $PYTHON_EXE -m pip install tornado - - if [[ $TRAVIS_PYTHON_VERSION != 'pypy'* && "$PYTHON_NIGHTLY" != 1 ]]; then - $PYTHON_EXE -m pip install numpy scipy; + - if [[ $TRAVIS_PYTHON_VERSION != 'pypy'* ]]; then + if [[ "$PYTHON_NIGHTLY" == "1" ]]; then + $PYTHON_EXE -m pip install git+https://github.com/cython/cython git+https://github.com/numpy/numpy; + else + $PYTHON_EXE -m pip install numpy scipy; + fi fi - if [[ $PROJECT != "" ]]; then $PYTHON_EXE -m pip install $TEST_REQUIREMENTS; @@ -126,5 +130,6 @@ script: fi fi after_success: + - pip install coverage codecov - coverage combine --append - codecov diff --git a/CHANGES.md b/CHANGES.md index 14d989501..7f393af00 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,11 @@ 1.2.0 ===== +- Leverage the C-accelerated Pickler new subclassing API (available in Python + 3.8) in cloudpickle. This allows cloudpickle to pickle Python objects up to + 30 times faster. + ([issue #253](https://github.com/cloudpipe/cloudpickle/pull/253)) + - Support pickling of classmethod and staticmethod objects in python2. arguments. ([issue #262](https://github.com/cloudpipe/cloudpickle/pull/262)) diff --git a/cloudpickle/__init__.py b/cloudpickle/__init__.py index 1af671683..2909cebb0 100644 --- a/cloudpickle/__init__.py +++ b/cloudpickle/__init__.py @@ -1,5 +1,11 @@ from __future__ import absolute_import +import sys +import pickle + + from cloudpickle.cloudpickle import * +if sys.version_info[:2] >= (3, 8): + from cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump __version__ = '1.2.0.dev0' diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 563f1af55..44ace512b 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -102,6 +102,8 @@ PY2 = False from importlib._bootstrap import _find_spec +_extract_code_globals_cache = weakref.WeakKeyDictionary() + def _ensure_tracking(class_def): with _DYNAMIC_CLASS_TRACKER_LOCK: @@ -195,6 +197,78 @@ def _is_global(obj, name=None): return obj2 is obj +def _extract_code_globals(co): + """ + Find all globals names read or written to by codeblock co + """ + out_names = _extract_code_globals_cache.get(co) + if out_names is None: + names = co.co_names + out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} + + # Declaring a function inside another one using the "def ..." + # syntax generates a constant code object corresonding to the one + # of the nested function's As the nested function may itself need + # global variables, we need to introspect its code, extract its + # globals, (look for code object in it's co_consts attribute..) and + # add the result to code_globals + if co.co_consts: + for const in co.co_consts: + if isinstance(const, types.CodeType): + out_names |= _extract_code_globals(const) + + _extract_code_globals_cache[co] = out_names + + return out_names + + +def _find_imported_submodules(code, top_level_dependencies): + """ + Find currently imported submodules used by a function. + + Submodules used by a function need to be detected and referenced for the + function to work correctly at depickling time. Because submodules can be + referenced as attribute of their parent package (``package.submodule``), we + need a special introspection technique that does not rely on GLOBAL-related + opcodes to find references of them in a code object. + + Example: + ``` + import concurrent.futures + import cloudpickle + def func(): + x = concurrent.futures.ThreadPoolExecutor + if __name__ == '__main__': + cloudpickle.dumps(func) + ``` + The globals extracted by cloudpickle in the function's state include the + concurrent package, but not its submodule (here, concurrent.futures), which + is the module used by func. Find_imported_submodules will detect the usage + of concurrent.futures. Saving this module alongside with func will ensure + that calling func once depickled does not fail due to concurrent.futures + not being imported + """ + + subimports = [] + # check if any known dependency is an imported package + for x in top_level_dependencies: + if (isinstance(x, types.ModuleType) and + hasattr(x, '__package__') and x.__package__): + # check if the package has any currently loaded sub-imports + prefix = x.__name__ + '.' + # A concurrent thread could mutate sys.modules, + # make sure we iterate over a copy to avoid exceptions + for name in list(sys.modules): + # Older versions of pytest will add a "None" module to + # sys.modules. + if name is not None and name.startswith(prefix): + # check whether the function can address the sub-module + tokens = set(name[len(prefix):].split('.')) + if not tokens - set(code.co_names): + subimports.append(sys.modules[name]) + return subimports + + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -493,54 +567,6 @@ def save_pypy_builtin_func(self, obj): obj.__dict__) self.save_reduce(*rv, obj=obj) - - def _save_subimports(self, code, top_level_dependencies): - """ - Save submodules used by a function but not listed in its globals. - - In the example below: - - ``` - import concurrent.futures - import cloudpickle - - - def func(): - x = concurrent.futures.ThreadPoolExecutor - - - if __name__ == '__main__': - cloudpickle.dumps(func) - ``` - - the globals extracted by cloudpickle in the function's state include - the concurrent module, but not its submodule (here, - concurrent.futures), which is the module used by func. - - To ensure that calling the depickled function does not raise an - AttributeError, this function looks for any currently loaded submodule - that the function uses and whose parent is present in the function - globals, and saves it before saving the function. - """ - - # check if any known dependency is an imported package - for x in top_level_dependencies: - if isinstance(x, types.ModuleType) and hasattr(x, '__package__') and x.__package__: - # check if the package has any currently loaded sub-imports - prefix = x.__name__ + '.' - # A concurrent thread could mutate sys.modules, - # make sure we iterate over a copy to avoid exceptions - for name in list(sys.modules): - # Older versions of pytest will add a "None" module to sys.modules. - if name is not None and name.startswith(prefix): - # check whether the function can address the sub-module - tokens = set(name[len(prefix):].split('.')) - if not tokens - set(code.co_names): - # ensure unpickler executes this import - self.save(sys.modules[name]) - # then discards the reference to it - self.write(pickle.POP) - def _save_dynamic_enum(self, obj, clsdict): """Special handling for dynamic Enum subclasses @@ -676,7 +702,12 @@ def save_function_tuple(self, func): save(_fill_function) # skeleton function updater write(pickle.MARK) # beginning of tuple that _fill_function expects - self._save_subimports( + # Extract currently-imported submodules used by func. Storing these + # modules in a smoke _cloudpickle_subimports attribute of the object's + # state will trigger the side effect of importing these modules at + # unpickling time (which is necessary for func to work correctly once + # depickled) + submodules = _find_imported_submodules( code, itertools.chain(f_globals.values(), closure_values or ()), ) @@ -700,6 +731,7 @@ def save_function_tuple(self, func): 'module': func.__module__, 'name': func.__name__, 'doc': func.__doc__, + '_cloudpickle_submodules': submodules } if hasattr(func, '__annotations__') and sys.version_info >= (3, 4): state['annotations'] = func.__annotations__ @@ -711,28 +743,6 @@ def save_function_tuple(self, func): write(pickle.TUPLE) write(pickle.REDUCE) # applies _fill_function on the tuple - _extract_code_globals_cache = weakref.WeakKeyDictionary() - - @classmethod - def extract_code_globals(cls, co): - """ - Find all globals names read or written to by codeblock co - """ - out_names = cls._extract_code_globals_cache.get(co) - if out_names is None: - names = co.co_names - out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} - - # see if nested function have any global refs - if co.co_consts: - for const in co.co_consts: - if isinstance(const, types.CodeType): - out_names |= cls.extract_code_globals(const) - - cls._extract_code_globals_cache[co] = out_names - - return out_names - def extract_func_data(self, func): """ Turn the function into a tuple of data necessary to recreate it: @@ -741,7 +751,7 @@ def extract_func_data(self, func): code = func.__code__ # extract all global ref's - func_global_refs = self.extract_code_globals(code) + func_global_refs = _extract_code_globals(code) # process all variables referenced by global environment f_globals = {} @@ -1202,6 +1212,13 @@ def _fill_function(*args): func.__qualname__ = state['qualname'] if 'kwdefaults' in state: func.__kwdefaults__ = state['kwdefaults'] + # _cloudpickle_subimports is a set of submodules that must be loaded for + # the pickled function to work correctly at unpickling time. Now that these + # submodules are depickled (hence imported), they can be removed from the + # object's state (the object state only served as a reference holder to + # these submodules) + if '_cloudpickle_submodules' in state: + state.pop('_cloudpickle_submodules') cells = func.__closure__ if cells is not None: diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py new file mode 100644 index 000000000..54421edb8 --- /dev/null +++ b/cloudpickle/cloudpickle_fast.py @@ -0,0 +1,542 @@ +""" +New, fast version of the CloudPickler. + +This new CloudPickler class can now extend the fast C Pickler instead of the +previous Python implementation of the Pickler class. Because this functionality +is only available for Python versions 3.8+, a lot of backward-compatibility +code is also removed. + +Note that the C Pickler sublassing API is CPython-specific. Therefore, some +guards present in cloudpickle.py that were written to handle PyPy specificities +are not present in cloudpickle_fast.py +""" +import abc +import copyreg +import io +import itertools +import logging +import _pickle +import pickle +import sys +import types +import weakref + +from _pickle import Pickler + +from .cloudpickle import ( + _is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, + _find_imported_submodules, _get_cell_contents, _is_global, _builtin_type, + Enum, _ensure_tracking, _make_skeleton_class, _make_skeleton_enum, + _extract_class_dict, string_types, dynamic_subimport, subimport +) + +load, loads = _pickle.load, _pickle.loads + + +# Shorthands similar to pickle.dump/pickle.dumps +def dump(obj, file, protocol=None): + """Serialize obj as bytes streamed into file + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed + between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + CloudPickler(file, protocol=protocol).dump(obj) + + +def dumps(obj, protocol=None): + """Serialize obj as a string of bytes allocated in memory + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed + between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + with io.BytesIO() as file: + cp = CloudPickler(file, protocol=protocol) + cp.dump(obj) + return file.getvalue() + + +# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS +# ------------------------------------------------- + +def _class_getnewargs(obj): + type_kwargs = {} + if hasattr(obj, "__slots__"): + type_kwargs["__slots__"] = obj.__slots__ + + __dict__ = obj.__dict__.get('__dict__', None) + if isinstance(__dict__, property): + type_kwargs['__dict__'] = __dict__ + + return (type(obj), obj.__name__, obj.__bases__, type_kwargs, + _ensure_tracking(obj), None) + + +def _enum_getnewargs(obj): + members = dict((e.name, e.value) for e in obj) + return (obj.__bases__, obj.__name__, obj.__qualname__, members, + obj.__module__, _ensure_tracking(obj), None) + + +# COLLECTION OF OBJECTS RECONSTRUCTORS +# ------------------------------------ +def _file_reconstructor(retval): + return retval + + +# COLLECTION OF OBJECTS STATE GETTERS +# ----------------------------------- +def _function_getstate(func): + # - Put func's dynamic attributes (stored in func.__dict__) in state. These + # attributes will be restored at unpickling time using + # f.__dict__.update(state) + # - Put func's members into slotstate. Such attributes will be restored at + # unpickling time by iterating over slotstate and calling setattr(func, + # slotname, slotvalue) + slotstate = { + "__name__": func.__name__, + "__qualname__": func.__qualname__, + "__annotations__": func.__annotations__, + "__kwdefaults__": func.__kwdefaults__, + "__defaults__": func.__defaults__, + "__module__": func.__module__, + "__doc__": func.__doc__, + "__closure__": func.__closure__, + } + + f_globals_ref = _extract_code_globals(func.__code__) + f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in + func.__globals__} + + closure_values = ( + list(map(_get_cell_contents, func.__closure__)) + if func.__closure__ is not None else () + ) + + # Extract currently-imported submodules used by func. Storing these modules + # in a smoke _cloudpickle_subimports attribute of the object's state will + # trigger the side effect of importing these modules at unpickling time + # (which is necessary for func to work correctly once depickled) + slotstate["_cloudpickle_submodules"] = _find_imported_submodules( + func.__code__, itertools.chain(f_globals.values(), closure_values)) + slotstate["__globals__"] = f_globals + + state = func.__dict__ + return state, slotstate + + +def _class_getstate(obj): + clsdict = _extract_class_dict(obj) + clsdict.pop('__weakref__', None) + + # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. + # This is a fix which breaks the cache but this only makes the first + # calls to issubclass slower. + if "_abc_impl" in clsdict: + (registry, _, _, _) = abc._get_dump(obj) + clsdict["_abc_impl"] = [subclass_weakref() + for subclass_weakref in registry] + if hasattr(obj, "__slots__"): + # pickle string length optimization: member descriptors of obj are + # created automatically from obj's __slots__ attribute, no need to + # save them in obj's state + if isinstance(obj.__slots__, string_types): + clsdict.pop(obj.__slots__) + else: + for k in obj.__slots__: + clsdict.pop(k, None) + + clsdict.pop('__dict__', None) # unpicklable property object + + return (clsdict, {}) + + +def _enum_getstate(obj): + clsdict, slotstate = _class_getstate(obj) + + members = dict((e.name, e.value) for e in obj) + # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: + # Those attributes are already handled by the metaclass. + for attrname in ["_generate_next_value_", "_member_names_", + "_member_map_", "_member_type_", + "_value2member_map_"]: + clsdict.pop(attrname, None) + for member in members: + clsdict.pop(member) + # Special handling of Enum subclasses + return clsdict, slotstate + + +# COLLECTIONS OF OBJECTS REDUCERS +# ------------------------------- +# A reducer is a function taking a single argument (obj), and that returns a +# tuple with all the necessary data to re-construct obj. Apart from a few +# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to +# correctly pickle an object. +# While many built-in objects (Exceptions objects, instances of the "object" +# class, etc), are shipped with their own built-in reducer (invoked using +# obj.__reduce__), some do not. The following methods were created to "fill +# these holes". + +def _code_reduce(obj): + """codeobject reducer""" + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, + obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, + obj.co_varnames, obj.co_filename, obj.co_name, + obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, + obj.co_cellvars + ) + return types.CodeType, args + + +def _cell_reduce(obj): + """Cell (containing values of a function's free variables) reducer""" + try: + obj.cell_contents + except ValueError: # cell is empty + return types.CellType, () + else: + return types.CellType, (obj.cell_contents,) + + +def _classmethod_reduce(obj): + orig_func = obj.__func__ + return type(obj), (orig_func,) + + +def _file_reduce(obj): + """Save a file""" + import io + + if not hasattr(obj, "name") or not hasattr(obj, "mode"): + raise pickle.PicklingError( + "Cannot pickle files that do not map to an actual file" + ) + if obj is sys.stdout: + return getattr, (sys, "stdout") + if obj is sys.stderr: + return getattr, (sys, "stderr") + if obj is sys.stdin: + raise pickle.PicklingError("Cannot pickle standard input") + if obj.closed: + raise pickle.PicklingError("Cannot pickle closed files") + if hasattr(obj, "isatty") and obj.isatty(): + raise pickle.PicklingError( + "Cannot pickle files that map to tty objects" + ) + if "r" not in obj.mode and "+" not in obj.mode: + raise pickle.PicklingError( + "Cannot pickle files that are not opened for reading: %s" + % obj.mode + ) + + name = obj.name + + retval = io.StringIO() + + try: + # Read the whole file + curloc = obj.tell() + obj.seek(0) + contents = obj.read() + obj.seek(curloc) + except IOError: + raise pickle.PicklingError( + "Cannot pickle file %s as it cannot be read" % name + ) + retval.write(contents) + retval.seek(curloc) + + retval.name = name + return _file_reconstructor, (retval,) + + +def _mappingproxy_reduce(obj): + return types.MappingProxyType, (dict(obj),) + + +def _memoryview_reduce(obj): + return bytes, (obj.tobytes(),) + + +def _module_reduce(obj): + if _is_dynamic(obj): + return dynamic_subimport, (obj.__name__, vars(obj)) + else: + return subimport, (obj.__name__,) + + +def _method_reduce(obj): + return (types.MethodType, (obj.__func__, obj.__self__)) + + +def _logger_reduce(obj): + return logging.getLogger, (obj.name,) + + +def _root_logger_reduce(obj): + return logging.getLogger, () + + +def _weakset_reduce(obj): + return weakref.WeakSet, (list(obj),) + + +def _dynamic_class_reduce(obj): + """ + Save a class that can't be stored as module global. + + This method is used to serialize classes that are defined inside + functions, or that otherwise can't be serialized as attribute lookups + from global modules. + """ + if Enum is not None and issubclass(obj, Enum): + return ( + _make_skeleton_enum, _enum_getnewargs(obj), _enum_getstate(obj), + None, None, _class_setstate + ) + else: + return ( + _make_skeleton_class, _class_getnewargs(obj), _class_getstate(obj), + None, None, _class_setstate + ) + + +def _class_reduce(obj): + """Select the reducer depending on the dynamic nature of the class obj""" + if obj is type(None): # noqa + return type, (None,) + elif obj is type(Ellipsis): + return type, (Ellipsis,) + elif obj is type(NotImplemented): + return type, (NotImplemented,) + elif obj in _BUILTIN_TYPE_NAMES: + return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) + elif not _is_global(obj): + return _dynamic_class_reduce(obj) + return NotImplemented + + +# COLLECTIONS OF OBJECTS STATE SETTERS +# ------------------------------------ +# state setters are called at unpickling time, once the object is created and +# it has to be updated to how it was at unpickling time. + + +def _function_setstate(obj, state): + """Update the state of a dynaamic function. + + As __closure__ and __globals__ are readonly attributes of a function, we + cannot rely on the native setstate routine of pickle.load_build, that calls + setattr on items of the slotstate. Instead, we have to modify them inplace. + """ + state, slotstate = state + obj.__dict__.update(state) + + obj_globals = slotstate.pop("__globals__") + obj_closure = slotstate.pop("__closure__") + # _cloudpickle_subimports is a set of submodules that must be loaded for + # the pickled function to work correctly at unpickling time. Now that these + # submodules are depickled (hence imported), they can be removed from the + # object's state (the object state only served as a reference holder to + # these submodules) + slotstate.pop("_cloudpickle_submodules") + + obj.__globals__.update(obj_globals) + obj.__globals__["__builtins__"] = __builtins__ + + if obj_closure is not None: + for i, cell in enumerate(obj_closure): + try: + value = cell.cell_contents + except ValueError: # cell is empty + continue + obj.__closure__[i].cell_contents = value + + for k, v in slotstate.items(): + setattr(obj, k, v) + + +def _class_setstate(obj, state): + state, slotstate = state + registry = None + for attrname, attr in state.items(): + if attrname == "_abc_impl": + registry = attr + else: + setattr(obj, attrname, attr) + if registry is not None: + for subclass in registry: + obj.register(subclass) + + return obj + + +class CloudPickler(Pickler): + """Fast C Pickler extension with additional reducing routines. + + CloudPickler's extensions exist into into: + + * its dispatch_table containing reducers that are called only if ALL + built-in saving functions were previously discarded. + * a special callback named "reducer_override", invoked before standard + function/class builtin-saving method (save_global), to serialize dynamic + functions + """ + + # cloudpickle's own dispatch_table, containing the additional set of + # objects (compared to the standard library pickle) that cloupickle can + # serialize. + dispatch = {} + dispatch[classmethod] = _classmethod_reduce + dispatch[io.TextIOWrapper] = _file_reduce + dispatch[logging.Logger] = _logger_reduce + dispatch[logging.RootLogger] = _root_logger_reduce + dispatch[memoryview] = _memoryview_reduce + dispatch[staticmethod] = _classmethod_reduce + dispatch[types.CellType] = _cell_reduce + dispatch[types.CodeType] = _code_reduce + dispatch[types.ModuleType] = _module_reduce + dispatch[types.MethodType] = _method_reduce + dispatch[types.MappingProxyType] = _mappingproxy_reduce + dispatch[weakref.WeakSet] = _weakset_reduce + + def __init__(self, file, protocol=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + Pickler.__init__(self, file, protocol=protocol) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share their + # global namespace at unpickling time. + self.globals_ref = {} + + # Take into account potential custom reducers registered by external + # modules + self.dispatch_table = copyreg.dispatch_table.copy() + self.dispatch_table.update(self.dispatch) + self.proto = int(protocol) + + def reducer_override(self, obj): + """Type-agnostic reducing callback for function and classes. + + For performance reasons, subclasses of the C _pickle.Pickler class + cannot register custom reducers for functions and classes in the + dispatch_table. Reducer for such types must instead implemented in the + special reducer_override method. + + Note that method will be called for any object except a few + builtin-types (int, lists, dicts etc.), which differs from reducers in + the Pickler's dispatch_table, each of them being invoked for objects of + a specific type only. + + This property comes in handy for classes: although most classes are + instances of the ``type`` metaclass, some of them can be instances of + other custom metaclasses (such as enum.EnumMeta for example). In + particular, the metaclass will likely not be known in advance, and thus + cannot be special-cased using an entry in the dispatch_table. + reducer_override, among other things, allows us to register a reducer + that will be called for any class, independently of its type. + + + Notes: + + * reducer_override has the priority over dispatch_table-registered + reducers. + * reducer_override can be used to fix other limitations of cloudpickle + for other types that suffered from type-specific reducers, such as + Exceptions. See https://github.com/cloudpipe/cloudpickle/issues/248 + """ + t = type(obj) + try: + is_anyclass = issubclass(t, type) + except TypeError: # t is not a class (old Boost; see SF #502085) + is_anyclass = False + + if is_anyclass: + return _class_reduce(obj) + elif isinstance(obj, types.FunctionType): + return self._function_reduce(obj) + else: + # fallback to save_global, including the Pickler's distpatch_table + return NotImplemented + + # function reducers are defined as instance methods of CloudPickler + # objects, as they rely on a CloudPickler attribute (globals_ref) + def _dynamic_function_reduce(self, func): + """Reduce a function that is not pickleable via attribute lookup.""" + newargs = self._function_getnewargs(func) + state = _function_getstate(func) + return (types.FunctionType, newargs, state, None, None, + _function_setstate) + + def _function_reduce(self, obj): + """Reducer for function objects. + + If obj is a top-level attribute of a file-backed module, this + reducer returns NotImplemented, making the CloudPickler fallback to + traditional _pickle.Pickler routines to save obj. Otherwise, it reduces + obj using a custom cloudpickle reducer designed specifically to handle + dynamic functions. + + As opposed to cloudpickle.py, There no special handling for builtin + pypy functions because cloudpickle_fast is CPython-specific. + """ + if _is_global(obj): + return NotImplemented + else: + return self._dynamic_function_reduce(obj) + + def _function_getnewargs(self, func): + code = func.__code__ + + # base_globals represents the future global namespace of func at + # unpickling time. Looking it up and storing it in + # CloudpiPickler.globals_ref allow functions sharing the same globals + # at pickling time to also share them once unpickled, at one condition: + # since globals_ref is an attribute of a CloudPickler instance, and + # that a new CloudPickler is created each time pickle.dump or + # pickle.dumps is called, functions also need to be saved within the + # same invocation of cloudpickle.dump/cloudpickle.dumps (for example: + # cloudpickle.dumps([f1, f2])). There is no such limitation when using + # CloudPickler.dump, as long as the multiple invocations are bound to + # the same CloudPickler. + base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) + + if base_globals == {}: + # Add module attributes used to resolve relative imports + # instructions inside func. + for k in ["__package__", "__name__", "__path__", "__file__"]: + if k in func.__globals__: + base_globals[k] = func.__globals__[k] + + # Do not bind the free variables before the function is created to + # avoid infinite recursion. + if func.__closure__ is None: + closure = None + else: + closure = tuple( + types.CellType() for _ in range(len(code.co_freevars))) + + return code, base_globals, None, None, closure + + def dump(self, obj): + try: + return Pickler.dump(self, obj) + except RuntimeError as e: + if "recursion" in e.args[0]: + msg = ( + "Could not pickle object as excessively deep recursion " + "required." + ) + raise pickle.PicklingError(msg) + else: + raise diff --git a/dev-requirements.txt b/dev-requirements.txt index faff1709e..1849f20d5 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -6,3 +6,4 @@ psutil futures; python_version < '3.4' # Code coverage uploader for Travis: codecov +coverage diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index e260a35ec..615b6977b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -961,7 +961,7 @@ def check_logger(self, name): logger = cloudpickle.loads(base64.b32decode(b'{}')) logger.info('hello') """.format(base64.b32encode(dumped).decode('ascii')) - proc = subprocess.Popen([sys.executable, "-c", code], + proc = subprocess.Popen([sys.executable, "-W ignore", "-c", code], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, _ = proc.communicate() @@ -1076,7 +1076,13 @@ def f(): # some setup is required to allow pytest apimodules to be correctly # serializable. from cloudpickle import CloudPickler - CloudPickler.dispatch[type(py.builtin)] = CloudPickler.save_module + if sys.version_info[:2] >= (3, 8): + from cloudpickle import cloudpickle_fast as cp_fast + CloudPickler.dispatch[ + type(py.builtin)] = cp_fast._module_reduce + else: + CloudPickler.dispatch[type(py.builtin)] = CloudPickler.save_module + g = cloudpickle.loads(cloudpickle.dumps(f, protocol=self.protocol)) result = g() @@ -1823,7 +1829,7 @@ def inner_function(): return _TEST_GLOBAL_VARIABLE return inner_function - globals_ = cloudpickle.CloudPickler.extract_code_globals( + globals_ = cloudpickle.cloudpickle._extract_code_globals( function_factory.__code__) assert globals_ == {'_TEST_GLOBAL_VARIABLE'} @@ -1832,6 +1838,15 @@ def inner_function(): inner_func = depickled_factory() assert inner_func() == _TEST_GLOBAL_VARIABLE + def test_recursion_during_pickling(self): + class A: + def __getattr__(self, name): + return getattr(self, name) + + a = A() + with pytest.raises(pickle.PicklingError, match='recursion'): + cloudpickle.dumps(a) + class Protocol2CloudPickleTest(CloudPickleTest): diff --git a/tests/testutils.py b/tests/testutils.py index 3ad1eb98e..e26849758 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -72,7 +72,11 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT): """ # run then pickle_echo(protocol=protocol) in __main__: - cmd = [sys.executable, __file__, "--protocol", str(protocol)] + + # Protect stderr from any warning, as we will assume an error will happen + # if it is not empty. A concrete example is pytest using the imp module, + # which is deprecated in python 3.8 + cmd = [sys.executable, '-W ignore', __file__, "--protocol", str(protocol)] cwd, env = _make_cwd_env() proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env, bufsize=4096) @@ -186,7 +190,7 @@ def assert_run_python_script(source_code, timeout=TIMEOUT): try: with open(source_file, 'wb') as f: f.write(source_code.encode('utf-8')) - cmd = [sys.executable, source_file] + cmd = [sys.executable, '-W ignore', source_file] cwd, env = _make_cwd_env() kwargs = { 'cwd': cwd,