Skip to content

Commit 21de653

Browse files
committed
return the string in json format
1 parent 15b1fe3 commit 21de653

File tree

2 files changed

+16
-14
lines changed

2 files changed

+16
-14
lines changed

examples/src/main/python/hbase_inputformat.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#
1717

1818
import sys
19-
import ast
19+
import simplejson as json
2020

2121
from pyspark import SparkContext
2222

@@ -75,8 +75,8 @@
7575
keyConverter=keyConv,
7676
valueConverter=valueConv,
7777
conf=conf)
78-
# hbase_rdd is a RDD[dict]
79-
hbase_rdd = hbase_rdd.flatMapValues(lambda v: v.split(" ")).mapValues(ast.literal_eval)
78+
# hbase_rdd is a RDD of dict
79+
hbase_rdd = hbase_rdd.flatMapValues(lambda v: v).mapValues(json.loads)
8080

8181
output = hbase_rdd.collect()
8282
for (k, v) in output:

examples/src/main/scala/org/apache/spark/examples/pythonconverters/HBaseConverters.scala

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.examples.pythonconverters
1919

2020
import scala.collection.JavaConversions._
21+
import scala.util.parsing.json._
2122

2223
import org.apache.spark.api.python.Converter
2324
import org.apache.hadoop.hbase.client.{Put, Result}
@@ -28,22 +29,23 @@ import org.apache.hadoop.hbase.CellUtil
2829

2930
/**
3031
* Implementation of [[org.apache.spark.api.python.Converter]] that converts all
31-
* the records in an HBase Result to a String
32+
* the records in an HBase Result to an Array[String]
3233
*/
33-
class HBaseResultToStringConverter extends Converter[Any, String] {
34-
override def convert(obj: Any): String = {
34+
class HBaseResultToStringConverter extends Converter[Any, Array[String]] {
35+
override def convert(obj: Any): Array[String] = {
3536
import collection.JavaConverters._
3637
val result = obj.asInstanceOf[Result]
3738
val output = result.listCells.asScala.map(cell =>
38-
"{'columnFamily':'%s','qualifier':'%s','timestamp':'%s','type':'%s','value':'%s'}".format(
39-
Bytes.toStringBinary(CellUtil.cloneFamily(cell)),
40-
Bytes.toStringBinary(CellUtil.cloneQualifier(cell)),
41-
cell.getTimestamp.toString,
42-
Type.codeToType(cell.getTypeByte),
43-
Bytes.toStringBinary(CellUtil.cloneValue(cell))
39+
Map(
40+
"row" -> Bytes.toStringBinary(CellUtil.cloneRow(cell)),
41+
"columnFamily" -> Bytes.toStringBinary(CellUtil.cloneFamily(cell)),
42+
"qualifier" -> Bytes.toStringBinary(CellUtil.cloneQualifier(cell)),
43+
"timestamp" -> cell.getTimestamp.toString,
44+
"type" -> Type.codeToType(cell.getTypeByte).toString,
45+
"value" -> Bytes.toStringBinary(CellUtil.cloneValue(cell))
4446
)
45-
)
46-
output.mkString(" ")
47+
)
48+
output.map(JSONObject(_).toString()).toArray
4749
}
4850
}
4951

0 commit comments

Comments
 (0)