@@ -351,6 +351,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
351
351
:return: a DataFrame
352
352
353
353
>>> l = [('Alice', 1)]
354
+ >>> sqlCtx.createDataFrame(l).collect()
355
+ [Row(_1=u'Alice', _2=1)]
354
356
>>> sqlCtx.createDataFrame(l, ['name', 'age']).collect()
355
357
[Row(name=u'Alice', age=1)]
356
358
@@ -359,6 +361,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
359
361
[Row(age=1, name=u'Alice')]
360
362
361
363
>>> rdd = sc.parallelize(l)
364
+ >>> sqlCtx.createDataFrame(rdd).collect()
365
+ [Row(_1=u'Alice', _2=1)]
362
366
>>> df = sqlCtx.createDataFrame(rdd, ['name', 'age'])
363
367
>>> df.collect()
364
368
[Row(name=u'Alice', age=1)]
@@ -377,14 +381,17 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
377
381
>>> df3 = sqlCtx.createDataFrame(rdd, schema)
378
382
>>> df3.collect()
379
383
[Row(name=u'Alice', age=1)]
384
+
385
+ >>> sqlCtx.createDataFrame(df.toPandas()).collect() # doctest: +SKIP
386
+ [Row(name=u'Alice', age=1)]
380
387
"""
381
388
if isinstance (data , DataFrame ):
382
389
raise TypeError ("data is already a DataFrame" )
383
390
384
391
if has_pandas and isinstance (data , pandas .DataFrame ):
385
- data = self ._sc .parallelize (data .to_records (index = False ))
386
392
if schema is None :
387
393
schema = list (data .columns )
394
+ data = [r .tolist () for r in data .to_records (index = False )]
388
395
389
396
if not isinstance (data , RDD ):
390
397
try :
@@ -399,7 +406,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
399
406
if isinstance (schema , (list , tuple )):
400
407
first = data .first ()
401
408
if not isinstance (first , (list , tuple )):
402
- raise ValueError ("each row in `rdd` should be list or tuple" )
409
+ raise ValueError ("each row in `rdd` should be list or tuple, "
410
+ "but got %r" % type (first ))
403
411
row_cls = Row (* schema )
404
412
schema = self ._inferSchema (data .map (lambda r : row_cls (* r )), samplingRatio )
405
413
0 commit comments