51
51
. " $FWDIR " /bin/load-spark-env.sh
52
52
53
53
# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
54
- # executable, while the worker would still be launched using PYSPARK_PYTHON. This allowed users to
55
- # launch IPython notebooks with PySpark without requiring IPython to be installed on the workers.
56
- # Unfortunately, this approach had a few drabacks:
54
+ # executable, while the worker would still be launched using PYSPARK_PYTHON.
57
55
#
58
- # - It wasn't easy to use a custom IPython executable (SPARK-3265).
59
- # - There was a risk that the `ipython` and `PYSPARK_PYTHON` executables might run different
60
- # Python versions (e.g. 2.6 on driver and 2.7 on the workers), which might lead to issues
61
- # when using certain Python serializers that are incompatible across releases (e.g. marshal).
62
- #
63
- # In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables, since
64
- # we've made the necessary changes to allow `ipython` to be used on the workers, too. Now,
65
- # users can simply set PYSPARK_PYTHON=ipython to use IPython and set PYSPARK_DRIVER_PYTHON_OPTS to
66
- # pass options when starting the Python driver (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook').
56
+ # In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
57
+ # PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
58
+ # Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
59
+ # PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
60
+ # (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
61
+ # and executor Python executables.
67
62
#
68
63
# For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.
69
64
70
- # If IPython options are specified, assume user wants to run IPython (for backwards-compatibility)
71
- if [[ -n " $IPYTHON_OPTS " ]]; then
72
- IPYTHON=1
73
- # For backwards-compatibility:
65
+ # Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
66
+ if hash python2.7 2> /dev/null; then
67
+ # Attempt to use Python 2.7, if installed:
68
+ DEFAULT_PYTHON=" python2.7"
69
+ else
70
+ DEFAULT_PYTHON=" python"
71
+ fi
72
+
73
+ # Determine the Python executable to use for the driver:
74
+ if [[ -n " $IPYTHON_OPTS " || " $IPYTHON " == " 1" ]]; then
75
+ # If IPython options are specified, assume user wants to run IPython
76
+ # (for backwards-compatibility)
74
77
PYSPARK_DRIVER_PYTHON_OPTS=" $PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS "
78
+ PYSPARK_DRIVER_PYTHON=" ipython"
79
+ elif [[ -z " $PYSPARK_DRIVER_PYTHON " ]]; then
80
+ PYSPARK_DRIVER_PYTHON=" ${PYSPARK_PYTHON:- " $DEFAULT_PYTHON " } "
75
81
fi
76
82
77
- # Figure out which Python executable to use.
78
- # If we're not running in the legacy IPYTHON mode, then use a default PYSPARK_PYTHON
79
- if [[ " $IPYTHON " != " 1" && -z " $PYSPARK_PYTHON " ]]; then
80
- PYSPARK_PYTHON=" python"
83
+ # Determine the Python executable to use for the executors:
84
+ if [[ -z " $PYSPARK_PYTHON " ]]; then
85
+ if [[ $PYSPARK_DRIVER_PYTHON == * ipython* && $DEFAULT_PYTHON != " python2.7" ]]; then
86
+ echo " IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
87
+ exit 1
88
+ else
89
+ PYSPARK_PYTHON=" $DEFAULT_PYTHON "
90
+ fi
81
91
fi
82
92
export PYSPARK_PYTHON
83
93
@@ -108,9 +118,9 @@ if [[ -n "$SPARK_TESTING" ]]; then
108
118
unset YARN_CONF_DIR
109
119
unset HADOOP_CONF_DIR
110
120
if [[ -n " $PYSPARK_DOC_TEST " ]]; then
111
- exec " $PYSPARK_PYTHON " -m doctest $1
121
+ exec " $PYSPARK_DRIVER_PYTHON " -m doctest $1
112
122
else
113
- exec " $PYSPARK_PYTHON " $1
123
+ exec " $PYSPARK_DRIVER_PYTHON " $1
114
124
fi
115
125
exit
116
126
fi
@@ -126,9 +136,5 @@ if [[ "$1" =~ \.py$ ]]; then
126
136
else
127
137
# PySpark shell requires special handling downstream
128
138
export PYSPARK_SHELL=1
129
- if [[ " $IPYTHON " = " 1" ]]; then
130
- exec " ${PYSPARK_PYTHON:- ipython} " $PYSPARK_DRIVER_PYTHON_OPTS
131
- else
132
- exec " $PYSPARK_PYTHON " $PYSPARK_DRIVER_PYTHON_OPTS
133
- fi
139
+ exec " $PYSPARK_DRIVER_PYTHON " $PYSPARK_DRIVER_PYTHON_OPTS
134
140
fi
0 commit comments