Skip to content

Commit eff9714

Browse files
ScrapCodesmateiz
authored andcommitted
[SPARK-2014] Make PySpark store RDDs in MEMORY_ONLY_SER with compression by default
Author: Prashant Sharma <[email protected]> Closes apache#1051 from ScrapCodes/SPARK-2014/pyspark-cache and squashes the following commits: f192df7 [Prashant Sharma] Code Review 2a2f43f [Prashant Sharma] [SPARK-2014] Make PySpark store RDDs in MEMORY_ONLY_SER with compression by default
1 parent a45d548 commit eff9714

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

python/pyspark/conf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ def set(self, key, value):
100100
self._jconf.set(key, unicode(value))
101101
return self
102102

103+
def setIfMissing(self, key, value):
104+
"""Set a configuration property, if not already set."""
105+
if self.get(key) is None:
106+
self.set(key, value)
107+
return self
108+
103109
def setMaster(self, value):
104110
"""Set master URL to connect to."""
105111
self._jconf.setMaster(value)

python/pyspark/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
101101
else:
102102
self.serializer = BatchedSerializer(self._unbatched_serializer,
103103
batchSize)
104-
104+
self._conf.setIfMissing("spark.rdd.compress", "true")
105105
# Set any parameters passed directly to us on the conf
106106
if master:
107107
self._conf.setMaster(master)

python/pyspark/rdd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,10 @@ def context(self):
231231

232232
def cache(self):
233233
"""
234-
Persist this RDD with the default storage level (C{MEMORY_ONLY}).
234+
Persist this RDD with the default storage level (C{MEMORY_ONLY_SER}).
235235
"""
236236
self.is_cached = True
237-
self._jrdd.cache()
237+
self.persist(StorageLevel.MEMORY_ONLY_SER)
238238
return self
239239

240240
def persist(self, storageLevel):

0 commit comments

Comments
 (0)