Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions python/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,6 @@

"""

# The following block allows us to import python's random instead of mllib.random for scripts in
# mllib that depend on top level pyspark packages, which transitively depend on python's random.
# Since Python's import logic looks for modules in the current package first, we eliminate
# mllib.random as a candidate for C{import random} by removing the first search path, the script's
# location, in order to force the loader to look in Python's top-level modules for C{random}.
import sys
s = sys.path.pop(0)
import random
sys.path.insert(0, s)

from pyspark.conf import SparkConf
from pyspark.context import SparkContext
from pyspark.rdd import RDD
Expand Down
34 changes: 34 additions & 0 deletions python/pyspark/mllib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,37 @@
import numpy
if numpy.version.version < '1.4':
raise Exception("MLlib requires NumPy 1.4+")

__all__ = ['classification', 'clustering', 'feature', 'linalg', 'random',
'recommendation', 'regression', 'stat', 'tree', 'util']

import sys
import rand as random
random.__name__ = 'random'
random.RandomRDDs.__module__ = __name__ + '.random'


class RandomModuleHook(object):
"""
Hook to import pyspark.mllib.random
"""
fullname = __name__ + '.random'

def find_module(self, name, path=None):
# skip all other modules
if not name.startswith(self.fullname):
return
return self

def load_module(self, name):
if name == self.fullname:
return random

cname = name.rsplit('.', 1)[-1]
try:
return getattr(random, cname)
except AttributeError:
raise ImportError


sys.meta_path.append(RandomModuleHook())
8 changes: 3 additions & 5 deletions python/pyspark/mllib/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
"""
Python package for feature in MLlib.
"""
from __future__ import absolute_import

import sys
import warnings
import random

from py4j.protocol import Py4JJavaError

Expand Down Expand Up @@ -341,8 +344,6 @@ def __init__(self):
"""
Construct Word2Vec instance
"""
import random # this can't be on the top because of mllib.random

self.vectorSize = 100
self.learningRate = 0.025
self.numPartitions = 1
Expand Down Expand Up @@ -411,8 +412,5 @@ def _test():
exit(-1)

if __name__ == "__main__":
# remove current path from list of search paths to avoid importing mllib.random
# for C{import random}, which is done in an external dependency of pyspark during doctests.
import sys
sys.path.pop(0)
_test()
4 changes: 0 additions & 4 deletions python/pyspark/mllib/linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,8 +614,4 @@ def _test():
exit(-1)

if __name__ == "__main__":
# remove current path from list of search paths to avoid importing mllib.random
# for C{import random}, which is done in an external dependency of pyspark during doctests.
import sys
sys.path.pop(0)
_test()
File renamed without changes.
2 changes: 1 addition & 1 deletion python/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function run_mllib_tests() {
run_test "pyspark/mllib/clustering.py"
run_test "pyspark/mllib/feature.py"
run_test "pyspark/mllib/linalg.py"
run_test "pyspark/mllib/random.py"
run_test "pyspark/mllib/rand.py"
run_test "pyspark/mllib/recommendation.py"
run_test "pyspark/mllib/regression.py"
run_test "pyspark/mllib/stat.py"
Expand Down