Skip to content

Commit 0c0b8d4

Browse files
authored
Improve dynamic module recognition (#357)
1 parent a0c3509 commit 0c0b8d4

File tree

10 files changed

+220
-106
lines changed

10 files changed

+220
-106
lines changed

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
1.5.0 (in development)
22
======================
33

4+
- Fix a bug causing cloudpickle to crash when pickling dynamically created,
5+
importable modules.
6+
([issue #360](https://github.com/cloudpipe/cloudpickle/issues/354))
7+
48

59
1.4.1
610
=====

cloudpickle/cloudpickle.py

Lines changed: 25 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,24 @@ def _whichmodule(obj, name):
163163
return None
164164

165165

166-
def _is_importable_by_name(obj, name=None):
167-
"""Determine if obj can be pickled as attribute of a file-backed module"""
168-
return _lookup_module_and_qualname(obj, name=name) is not None
166+
def _is_importable(obj, name=None):
167+
"""Dispatcher utility to test the importability of various constructs."""
168+
if isinstance(obj, types.FunctionType):
169+
return _lookup_module_and_qualname(obj, name=name) is not None
170+
elif issubclass(type(obj), type):
171+
return _lookup_module_and_qualname(obj, name=name) is not None
172+
elif isinstance(obj, types.ModuleType):
173+
# We assume that sys.modules is primarily used as a cache mechanism for
174+
# the Python import machinery. Checking if a module has been added in
175+
# is sys.modules therefore a cheap and simple heuristic to tell us whether
176+
# we can assume that a given module could be imported by name in
177+
# another Python process.
178+
return obj.__name__ in sys.modules
179+
else:
180+
raise TypeError(
181+
"cannot check importability of {} instances".format(
182+
type(obj).__name__)
183+
)
169184

170185

171186
def _lookup_module_and_qualname(obj, name=None):
@@ -187,6 +202,8 @@ def _lookup_module_and_qualname(obj, name=None):
187202
if module_name == "__main__":
188203
return None
189204

205+
# Note: if module_name is in sys.modules, the corresponding module is
206+
# assumed importable at unpickling time. See #357
190207
module = sys.modules.get(module_name, None)
191208
if module is None:
192209
# The main reason why obj's module would not be imported is that this
@@ -196,10 +213,6 @@ def _lookup_module_and_qualname(obj, name=None):
196213
# supported, as the standard pickle does not support it either.
197214
return None
198215

199-
# module has been added to sys.modules, but it can still be dynamic.
200-
if _is_dynamic(module):
201-
return None
202-
203216
try:
204217
obj2, parent = _getattribute(module, name)
205218
except AttributeError:
@@ -496,12 +509,12 @@ def save_module(self, obj):
496509
"""
497510
Save a module as an import
498511
"""
499-
if _is_dynamic(obj):
512+
if _is_importable(obj):
513+
self.save_reduce(subimport, (obj.__name__,), obj=obj)
514+
else:
500515
obj.__dict__.pop('__builtins__', None)
501516
self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)),
502517
obj=obj)
503-
else:
504-
self.save_reduce(subimport, (obj.__name__,), obj=obj)
505518

506519
dispatch[types.ModuleType] = save_module
507520

@@ -536,7 +549,7 @@ def save_function(self, obj, name=None):
536549
Determines what kind of function obj is (e.g. lambda, defined at
537550
interactive prompt, etc) and handles the pickling appropriately.
538551
"""
539-
if _is_importable_by_name(obj, name=name):
552+
if _is_importable(obj, name=name):
540553
return Pickler.save_global(self, obj, name=name)
541554
elif PYPY and isinstance(obj.__code__, builtin_code_type):
542555
return self.save_pypy_builtin_func(obj)
@@ -840,7 +853,7 @@ def save_global(self, obj, name=None, pack=struct.pack):
840853
self._save_parametrized_type_hint(obj)
841854
elif name is not None:
842855
Pickler.save_global(self, obj, name=name)
843-
elif not _is_importable_by_name(obj, name=name):
856+
elif not _is_importable(obj, name=name):
844857
self.save_dynamic_class(obj)
845858
else:
846859
Pickler.save_global(self, obj, name=name)
@@ -1307,39 +1320,6 @@ class id will also reuse this enum definition.
13071320
return _lookup_class_or_track(class_tracker_id, enum_class)
13081321

13091322

1310-
def _is_dynamic(module):
1311-
"""
1312-
Return True if the module is special module that cannot be imported by its
1313-
name.
1314-
"""
1315-
# Quick check: module that have __file__ attribute are not dynamic modules.
1316-
if hasattr(module, '__file__'):
1317-
return False
1318-
1319-
if module.__spec__ is not None:
1320-
return False
1321-
1322-
# In PyPy, Some built-in modules such as _codecs can have their
1323-
# __spec__ attribute set to None despite being imported. For such
1324-
# modules, the ``_find_spec`` utility of the standard library is used.
1325-
parent_name = module.__name__.rpartition('.')[0]
1326-
if parent_name: # pragma: no cover
1327-
# This code handles the case where an imported package (and not
1328-
# module) remains with __spec__ set to None. It is however untested
1329-
# as no package in the PyPy stdlib has __spec__ set to None after
1330-
# it is imported.
1331-
try:
1332-
parent = sys.modules[parent_name]
1333-
except KeyError:
1334-
msg = "parent {!r} not in sys.modules"
1335-
raise ImportError(msg.format(parent_name))
1336-
else:
1337-
pkgpath = parent.__path__
1338-
else:
1339-
pkgpath = None
1340-
return _find_spec(module.__name__, pkgpath, module) is None
1341-
1342-
13431323
def _make_typevar(name, bound, constraints, covariant, contravariant,
13441324
class_tracker_id):
13451325
tv = typing.TypeVar(

cloudpickle/cloudpickle_fast.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@
2525
from _pickle import Pickler
2626

2727
from .cloudpickle import (
28-
_is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL,
29-
_find_imported_submodules, _get_cell_contents, _is_importable_by_name, _builtin_type,
30-
Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum,
31-
_extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases,
28+
_extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL,
29+
_find_imported_submodules, _get_cell_contents, _is_importable,
30+
_builtin_type, Enum, _get_or_create_tracker_id, _make_skeleton_class,
31+
_make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport,
32+
_typevar_reduce, _get_bases,
3233
)
3334

3435
load, loads = _pickle.load, _pickle.loads
@@ -276,11 +277,11 @@ def _memoryview_reduce(obj):
276277

277278

278279
def _module_reduce(obj):
279-
if _is_dynamic(obj):
280+
if _is_importable(obj):
281+
return subimport, (obj.__name__,)
282+
else:
280283
obj.__dict__.pop('__builtins__', None)
281284
return dynamic_subimport, (obj.__name__, vars(obj))
282-
else:
283-
return subimport, (obj.__name__,)
284285

285286

286287
def _method_reduce(obj):
@@ -333,7 +334,7 @@ def _class_reduce(obj):
333334
return type, (NotImplemented,)
334335
elif obj in _BUILTIN_TYPE_NAMES:
335336
return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
336-
elif not _is_importable_by_name(obj):
337+
elif not _is_importable(obj):
337338
return _dynamic_class_reduce(obj)
338339
return NotImplemented
339340

@@ -505,7 +506,7 @@ def _function_reduce(self, obj):
505506
As opposed to cloudpickle.py, There no special handling for builtin
506507
pypy functions because cloudpickle_fast is CPython-specific.
507508
"""
508-
if _is_importable_by_name(obj):
509+
if _is_importable(obj):
509510
return NotImplemented
510511
else:
511512
return self._dynamic_function_reduce(obj)

dev-requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ numpy; python_version <= '3.8'
1111
# Code coverage uploader for Travis:
1212
codecov
1313
coverage
14+
# Utility package used when running the cloudpickle test suite
15+
./tests/cloudpickle_testpkg

tests/cloudpickle_test.py

Lines changed: 64 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
tornado = None
4444

4545
import cloudpickle
46-
from cloudpickle.cloudpickle import _is_dynamic
46+
from cloudpickle.cloudpickle import _is_importable
4747
from cloudpickle.cloudpickle import _make_empty_cell, cell_set
4848
from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule
4949
from cloudpickle.cloudpickle import _lookup_module_and_qualname
@@ -52,6 +52,8 @@
5252
from .testutils import assert_run_python_script
5353
from .testutils import subprocess_worker
5454

55+
from _cloudpickle_testpkg import relative_imports_factory
56+
5557

5658
_TEST_GLOBAL_VARIABLE = "default_value"
5759

@@ -678,25 +680,62 @@ def my_small_function(x, y):
678680
assert b'unwanted_function' not in b
679681
assert b'math' not in b
680682

681-
def test_is_dynamic_module(self):
683+
def test_module_importability(self):
682684
import pickle # decouple this test from global imports
683685
import os.path
684686
import distutils
685687
import distutils.ccompiler
686688

687-
assert not _is_dynamic(pickle)
688-
assert not _is_dynamic(os.path) # fake (aliased) module
689-
assert not _is_dynamic(distutils) # package
690-
assert not _is_dynamic(distutils.ccompiler) # module in package
689+
assert _is_importable(pickle)
690+
assert _is_importable(os.path) # fake (aliased) module
691+
assert _is_importable(distutils) # package
692+
assert _is_importable(distutils.ccompiler) # module in package
691693

692-
# user-created module without using the import machinery are also
693-
# dynamic
694694
dynamic_module = types.ModuleType('dynamic_module')
695-
assert _is_dynamic(dynamic_module)
695+
assert not _is_importable(dynamic_module)
696696

697697
if platform.python_implementation() == 'PyPy':
698698
import _codecs
699-
assert not _is_dynamic(_codecs)
699+
assert _is_importable(_codecs)
700+
701+
# #354: Check that modules created dynamically during the import of
702+
# their parent modules are considered importable by cloudpickle.
703+
# See the mod_with_dynamic_submodule documentation for more
704+
# details of this use case.
705+
import _cloudpickle_testpkg.mod.dynamic_submodule as m
706+
assert _is_importable(m)
707+
assert pickle_depickle(m, protocol=self.protocol) is m
708+
709+
# Check for similar behavior for a module that cannot be imported by
710+
# attribute lookup.
711+
from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2
712+
# Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2
713+
# works only for Python 3.7+
714+
assert _is_importable(m2)
715+
assert pickle_depickle(m2, protocol=self.protocol) is m2
716+
717+
# Submodule_three is a dynamic module only importable via module lookup
718+
with pytest.raises(ImportError):
719+
import _cloudpickle_testpkg.mod.submodule_three # noqa
720+
from _cloudpickle_testpkg.mod import submodule_three as m3
721+
assert not _is_importable(m3)
722+
723+
# This module cannot be pickled using attribute lookup (as it does not
724+
# have a `__module__` attribute like classes and functions.
725+
assert not hasattr(m3, '__module__')
726+
depickled_m3 = pickle_depickle(m3, protocol=self.protocol)
727+
assert depickled_m3 is not m3
728+
assert m3.f(1) == depickled_m3.f(1)
729+
730+
# Do the same for an importable dynamic submodule inside a dynamic
731+
# module inside a file-backed module.
732+
import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa
733+
assert _is_importable(sm)
734+
assert pickle_depickle(sm, protocol=self.protocol) is sm
735+
736+
expected = "cannot check importability of object instances"
737+
with pytest.raises(TypeError, match=expected):
738+
_is_importable(object())
700739

701740
def test_Ellipsis(self):
702741
self.assertEqual(Ellipsis,
@@ -1181,11 +1220,14 @@ def func(x):
11811220
func.__module__ = None
11821221

11831222
class NonModuleObject(object):
1223+
def __ini__(self):
1224+
self.some_attr = None
1225+
11841226
def __getattr__(self, name):
1185-
# We whitelist func so that a _whichmodule(func, None) call returns
1186-
# the NonModuleObject instance if a type check on the entries
1187-
# of sys.modules is not carried out, but manipulating this
1188-
# instance thinking it really is a module later on in the
1227+
# We whitelist func so that a _whichmodule(func, None) call
1228+
# returns the NonModuleObject instance if a type check on the
1229+
# entries of sys.modules is not carried out, but manipulating
1230+
# this instance thinking it really is a module later on in the
11891231
# pickling process of func errors out
11901232
if name == 'func':
11911233
return func
@@ -1200,7 +1242,7 @@ def __getattr__(self, name):
12001242
# Any manipulation of non_module_object relying on attribute access
12011243
# will raise an Exception
12021244
with pytest.raises(AttributeError):
1203-
_is_dynamic(non_module_object)
1245+
_ = non_module_object.some_attr
12041246

12051247
try:
12061248
sys.modules['NonModuleObject'] = non_module_object
@@ -1966,20 +2008,9 @@ def check_positive(x):
19662008

19672009
def test_relative_import_inside_function(self):
19682010
# Make sure relative imports inside round-tripped functions is not
1969-
# broken.This was a bug in cloudpickle versions <= 0.5.3 and was
2011+
# broken. This was a bug in cloudpickle versions <= 0.5.3 and was
19702012
# re-introduced in 0.8.0.
1971-
1972-
# Both functions living inside modules and packages are tested.
1973-
def f():
1974-
# module_function belongs to mypkg.mod1, which is a module
1975-
from .mypkg import module_function
1976-
return module_function()
1977-
1978-
def g():
1979-
# package_function belongs to mypkg, which is a package
1980-
from .mypkg import package_function
1981-
return package_function()
1982-
2013+
f, g = relative_imports_factory()
19832014
for func, source in zip([f, g], ["module", "package"]):
19842015
# Make sure relative imports are initially working
19852016
assert func() == "hello from a {}!".format(source)
@@ -2028,7 +2059,7 @@ def f(a, /, b=1):
20282059
def test___reduce___returns_string(self):
20292060
# Non regression test for objects with a __reduce__ method returning a
20302061
# string, meaning "save by attribute using save_global"
2031-
from .mypkg import some_singleton
2062+
from _cloudpickle_testpkg import some_singleton
20322063
assert some_singleton.__reduce__() == "some_singleton"
20332064
depickled_singleton = pickle_depickle(
20342065
some_singleton, protocol=self.protocol)
@@ -2100,7 +2131,7 @@ def test_pickle_dynamic_typevar_memoization(self):
21002131
assert depickled_T1 is depickled_T2
21012132

21022133
def test_pickle_importable_typevar(self):
2103-
from .mypkg import T
2134+
from _cloudpickle_testpkg import T
21042135
T1 = pickle_depickle(T, protocol=self.protocol)
21052136
assert T1 is T
21062137

@@ -2230,12 +2261,12 @@ def test_lookup_module_and_qualname_dynamic_typevar():
22302261

22312262

22322263
def test_lookup_module_and_qualname_importable_typevar():
2233-
from . import mypkg
2234-
T = mypkg.T
2264+
import _cloudpickle_testpkg
2265+
T = _cloudpickle_testpkg.T
22352266
module_and_name = _lookup_module_and_qualname(T, name=T.__name__)
22362267
assert module_and_name is not None
22372268
module, name = module_and_name
2238-
assert module is mypkg
2269+
assert module is _cloudpickle_testpkg
22392270
assert name == 'T'
22402271

22412272

0 commit comments

Comments
 (0)