@@ -365,6 +365,39 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
365
365
}
366
366
}
367
367
368
+ test(" SPARK-26859 Fix field writer index bug in non-vectorized ORC deserializer" ) {
369
+ withSQLConf(SQLConf .ORC_VECTORIZED_READER_ENABLED .key -> " false" ) {
370
+ withTempPath { dir =>
371
+ val path = dir.getCanonicalPath
372
+
373
+ val df1 = Seq ((1 , 2 , " abc" ), (4 , 5 , " def" ), (8 , 9 , null )).toDF(" col1" , " col2" , " col3" )
374
+ val df2 = Seq ((10 , null , 20 , null ), (40 , " uvw" , 50 , " xyz" ), (80 , null , 90 , null ))
375
+ .toDF(" col1" , " col4" , " col2" , " col3" )
376
+
377
+ val dir1 = s " $path${File .separator}part=one "
378
+ val dir2 = s " $path${File .separator}part=two "
379
+
380
+ val format = " orc"
381
+
382
+ df1.write.format(format).save(dir1)
383
+ df2.write.format(format).save(dir2)
384
+
385
+ val df = spark.read
386
+ .schema(df2.schema)
387
+ .format(format)
388
+ .load(path)
389
+
390
+ checkAnswer(df, Seq (
391
+ Row (1 , null , 2 , " abc" , " one" ),
392
+ Row (4 , null , 5 , " def" , " one" ),
393
+ Row (8 , null , 9 , null , " one" ),
394
+ Row (10 , null , 20 , null , " two" ),
395
+ Row (40 , " uvw" , 50 , " xyz" , " two" ),
396
+ Row (80 , null , 90 , null , " two" )))
397
+ }
398
+ }
399
+ }
400
+
368
401
test(" Write Spark version into ORC file metadata" ) {
369
402
withTempPath { path =>
370
403
spark.range(1 ).repartition(1 ).write.orc(path.getCanonicalPath)
0 commit comments