1
+ < << << << HEAD
2
+ == == == =
3
+ __author__ = 'ktakagiw'
4
+
5
+
6
+ > >> >> >> initial commit for pySparkStreaming
1
7
#
2
8
# Licensed to the Apache Software Foundation (ASF) under one or more
3
9
# contributor license agreements. See the NOTICE file distributed with
15
21
# limitations under the License.
16
22
#
17
23
24
+ << < << << HEAD
18
25
import sys
19
26
from signal import signal , SIGTERM , SIGINT
20
27
@@ -29,12 +36,43 @@ class StreamingContext(object):
29
36
"""
30
37
Main entry point for Spark Streaming functionality. A StreamingContext represents the
31
38
connection to a Spark cluster, and can be used to create L{DStream}s and
39
+ =======
40
+ import os
41
+ import shutil
42
+ import sys
43
+ from threading import Lock
44
+ from tempfile import NamedTemporaryFile
45
+
46
+ from pyspark import accumulators
47
+ from pyspark.accumulators import Accumulator
48
+ from pyspark.broadcast import Broadcast
49
+ from pyspark.conf import SparkConf
50
+ from pyspark.files import SparkFiles
51
+ from pyspark.java_gateway import launch_gateway
52
+ from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer
53
+ from pyspark.storagelevel import StorageLevel
54
+ from pyspark.rdd import RDD
55
+ from pyspark.context import SparkContext
56
+
57
+ from py4j.java_collections import ListConverter
58
+
59
+ from pyspark.streaming.dstream import DStream
60
+
61
+ class StreamingContext(object):
62
+ """
63
+ Main entry point for Spark functionality . A StreamingContext represents the
64
+ connection to a Spark cluster , and can be used to create L {RDD }s and
65
+ >> > >> > > initial commit for pySparkStreaming
32
66
broadcast variables on that cluster .
33
67
"""
34
68
35
69
def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
36
70
environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None,
71
+ <<<<<<< HEAD
37
72
gateway=None, sparkContext=None, duration=None):
73
+ =======
74
+ gateway=None, duration=None):
75
+ >>>>>>> initial commit for pySparkStreaming
38
76
"""
39
77
Create a new StreamingContext . At least the master and app name and duration
40
78
should be set , either through the named parameters here or through C {conf }.
@@ -55,6 +93,7 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
55
93
@param conf : A L {SparkConf } object setting Spark properties .
56
94
@param gateway : Use an existing gateway and JVM , otherwise a new JVM
57
95
will be instatiated .
96
+ << < << < < HEAD
58
97
@param sparkContext : L {SparkContext } object .
59
98
@param duration : A L {Duration } object for SparkStreaming .
60
99
@@ -73,13 +112,23 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
73
112
# is started in StreamingContext.
74
113
SparkContext._gateway.restart_callback_server()
75
114
self._clean_up_trigger()
115
+ =======
116
+ @param duration: A L{Duration} Duration for SparkStreaming
117
+
118
+ """
119
+ # Create the Python Sparkcontext
120
+ self ._sc = SparkContext (master = master , appName = appName , sparkHome = sparkHome ,
121
+ pyFiles = pyFiles , environment = environment , batchSize = batchSize ,
122
+ serializer = serializer , conf = conf , gateway = gateway )
123
+ >> >> > >> initial commit for pySparkStreaming
76
124
self ._jvm = self ._sc ._jvm
77
125
self ._jssc = self ._initialize_context (self ._sc ._jsc , duration ._jduration )
78
126
79
127
# Initialize StremaingContext in function to allow subclass specific initialization
80
128
def _initialize_context (self , jspark_context , jduration ):
81
129
return self ._jvm .JavaStreamingContext (jspark_context , jduration )
82
130
131
+ << << < << HEAD
83
132
def _clean_up_trigger (self ):
84
133
"""Kill py4j callback server properly using signal lib"""
85
134
@@ -156,3 +205,53 @@ def _testInputStream(self, test_inputs, numSlices=None):
156
205
jinput_stream = self ._jvm .PythonTestInputStream (self ._jssc , jtest_rdds ).asJavaDStream ()
157
206
158
207
return DStream (jinput_stream , self , test_rdd_deserializers [0 ])
208
+ == == == =
209
+ def actorStream (self , props , name , storageLevel , supervisorStrategy ):
210
+ raise NotImplementedError
211
+
212
+ def addStreamingListener (self , streamingListener ):
213
+ raise NotImplementedError
214
+
215
+ def awaitTermination (self , timeout = None ):
216
+ if timeout :
217
+ self ._jssc .awaitTermination (timeout )
218
+ else :
219
+ self ._jssc .awaitTermination ()
220
+
221
+ def checkpoint (self , directory ):
222
+ raise NotImplementedError
223
+
224
+ def fileStream (self , directory , filter = None , newFilesOnly = None ):
225
+ raise NotImplementedError
226
+
227
+ def networkStream (self , receiver ):
228
+ raise NotImplementedError
229
+
230
+ def queueStream (self , queue , oneAtATime = True , defaultRDD = None ):
231
+ raise NotImplementedError
232
+
233
+ def rawSocketStream (self , hostname , port , storagelevel ):
234
+ raise NotImplementedError
235
+
236
+ def remember (self , duration ):
237
+ raise NotImplementedError
238
+
239
+ def socketStream (hostname , port , converter ,storageLevel ):
240
+ raise NotImplementedError
241
+
242
+ def start (self ):
243
+ self ._jssc .start ()
244
+
245
+ def stop (self , stopSparkContext = True ):
246
+ raise NotImplementedError
247
+
248
+ def textFileStream (self , directory ):
249
+ return DStream (self ._jssc .textFileStream (directory ), self , UTF8Deserializer ())
250
+
251
+ def transform (self , seq ):
252
+ raise NotImplementedError
253
+
254
+ def union (self , seq ):
255
+ raise NotImplementedError
256
+
257
+ >> >> >> > initial commit for pySparkStreaming
0 commit comments