@@ -128,15 +128,23 @@ class SQLContext(@transient val sparkContext: SparkContext)
128
128
*
129
129
* @group userf
130
130
*/
131
- def jsonFile (path : String ): SchemaRDD = jsonFile(path, 1.0 )
131
+ def jsonFile (path : String ): SchemaRDD = jsonFile(path, 1.0 , None )
132
+
133
+ /**
134
+ * Loads a JSON file (one object per line) and applies the given schema,
135
+ * returning the result as a [[SchemaRDD ]].
136
+ *
137
+ * @group userf
138
+ */
139
+ def jsonFile (path : String , schema : StructType ): SchemaRDD = jsonFile(path, 1.0 , Option (schema))
132
140
133
141
/**
134
142
* :: Experimental ::
135
143
*/
136
144
@ Experimental
137
- def jsonFile (path : String , samplingRatio : Double ): SchemaRDD = {
145
+ def jsonFile (path : String , samplingRatio : Double , schema : Option [ StructType ] ): SchemaRDD = {
138
146
val json = sparkContext.textFile(path)
139
- jsonRDD(json, samplingRatio)
147
+ jsonRDD(json, samplingRatio, schema )
140
148
}
141
149
142
150
/**
@@ -146,15 +154,28 @@ class SQLContext(@transient val sparkContext: SparkContext)
146
154
*
147
155
* @group userf
148
156
*/
149
- def jsonRDD (json : RDD [String ]): SchemaRDD = jsonRDD(json, 1.0 )
157
+ def jsonRDD (json : RDD [String ]): SchemaRDD = jsonRDD(json, 1.0 , None )
158
+
159
+ /**
160
+ * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
161
+ * returning the result as a [[SchemaRDD ]].
162
+ *
163
+ * @group userf
164
+ */
165
+ def jsonRDD (json : RDD [String ], schema : StructType ): SchemaRDD = jsonRDD(json, 1.0 , Option (schema))
150
166
151
167
/**
152
168
* :: Experimental ::
153
169
*/
154
170
@ Experimental
155
- def jsonRDD (json : RDD [String ], samplingRatio : Double ): SchemaRDD = {
156
- val schema = JsonRDD .nullTypeToStringType(JsonRDD .inferSchema(json, samplingRatio))
157
- applySchemaToPartitions(json, schema, JsonRDD .jsonStringToRow(schema, _ : Iterator [String ]))
171
+ def jsonRDD (json : RDD [String ], samplingRatio : Double , schema : Option [StructType ]): SchemaRDD = {
172
+ val appliedSchema =
173
+ schema.getOrElse(JsonRDD .nullTypeToStringType(JsonRDD .inferSchema(json, samplingRatio)))
174
+
175
+ applySchemaToPartitions(
176
+ json,
177
+ appliedSchema,
178
+ JsonRDD .jsonStringToRow(appliedSchema, _ : Iterator [String ]))
158
179
}
159
180
160
181
/**
0 commit comments