Skip to content

Commit f551898

Browse files
jyotiskamateiz
authored andcommitted
[SPARK-972] Added detailed callsite info for ValueError in context.py (resubmitted)
Author: jyotiska <[email protected]> Closes #34 from jyotiska/pyspark_code and squashes the following commits: c9439be [jyotiska] replaced dict with namedtuple a6bf4cd [jyotiska] added callsite info for context.py
1 parent e1e09e0 commit f551898

File tree

2 files changed

+29
-8
lines changed

2 files changed

+29
-8
lines changed

python/pyspark/context.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import sys
2121
from threading import Lock
2222
from tempfile import NamedTemporaryFile
23+
from collections import namedtuple
2324

2425
from pyspark import accumulators
2526
from pyspark.accumulators import Accumulator
@@ -29,6 +30,7 @@
2930
from pyspark.java_gateway import launch_gateway
3031
from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer
3132
from pyspark.storagelevel import StorageLevel
33+
from pyspark import rdd
3234
from pyspark.rdd import RDD
3335

3436
from py4j.java_collections import ListConverter
@@ -83,6 +85,11 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
8385
...
8486
ValueError:...
8587
"""
88+
if rdd._extract_concise_traceback() is not None:
89+
self._callsite = rdd._extract_concise_traceback()
90+
else:
91+
tempNamedTuple = namedtuple("Callsite", "function file linenum")
92+
self._callsite = tempNamedTuple(function=None, file=None, linenum=None)
8693
SparkContext._ensure_initialized(self, gateway=gateway)
8794

8895
self.environment = environment or {}
@@ -169,7 +176,14 @@ def _ensure_initialized(cls, instance=None, gateway=None):
169176

170177
if instance:
171178
if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
172-
raise ValueError("Cannot run multiple SparkContexts at once")
179+
currentMaster = SparkContext._active_spark_context.master
180+
currentAppName = SparkContext._active_spark_context.appName
181+
callsite = SparkContext._active_spark_context._callsite
182+
183+
# Raise error if there is already a running Spark context
184+
raise ValueError("Cannot run multiple SparkContexts at once; existing SparkContext(app=%s, master=%s)" \
185+
" created by %s at %s:%s " \
186+
% (currentAppName, currentMaster, callsite.function, callsite.file, callsite.linenum))
173187
else:
174188
SparkContext._active_spark_context = instance
175189

python/pyspark/rdd.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from base64 import standard_b64encode as b64enc
1919
import copy
2020
from collections import defaultdict
21+
from collections import namedtuple
2122
from itertools import chain, ifilter, imap
2223
import operator
2324
import os
@@ -42,12 +43,14 @@
4243
__all__ = ["RDD"]
4344

4445
def _extract_concise_traceback():
46+
"""
47+
This function returns the traceback info for a callsite, returns a dict
48+
with function name, file name and line number
49+
"""
4550
tb = traceback.extract_stack()
51+
callsite = namedtuple("Callsite", "function file linenum")
4652
if len(tb) == 0:
47-
return "I'm lost!"
48-
# HACK: This function is in a file called 'rdd.py' in the top level of
49-
# everything PySpark. Just trim off the directory name and assume
50-
# everything in that tree is PySpark guts.
53+
return None
5154
file, line, module, what = tb[len(tb) - 1]
5255
sparkpath = os.path.dirname(file)
5356
first_spark_frame = len(tb) - 1
@@ -58,16 +61,20 @@ def _extract_concise_traceback():
5861
break
5962
if first_spark_frame == 0:
6063
file, line, fun, what = tb[0]
61-
return "%s at %s:%d" % (fun, file, line)
64+
return callsite(function=fun, file=file, linenum=line)
6265
sfile, sline, sfun, swhat = tb[first_spark_frame]
6366
ufile, uline, ufun, uwhat = tb[first_spark_frame-1]
64-
return "%s at %s:%d" % (sfun, ufile, uline)
67+
return callsite(function=sfun, file=ufile, linenum=uline)
6568

6669
_spark_stack_depth = 0
6770

6871
class _JavaStackTrace(object):
6972
def __init__(self, sc):
70-
self._traceback = _extract_concise_traceback()
73+
tb = _extract_concise_traceback()
74+
if tb is not None:
75+
self._traceback = "%s at %s:%s" % (tb.function, tb.file, tb.linenum)
76+
else:
77+
self._traceback = "Error! Could not extract traceback info"
7178
self._context = sc
7279

7380
def __enter__(self):

0 commit comments

Comments
 (0)