@@ -43,14 +43,8 @@ abstract class OrcSuite extends QueryTest with BeforeAndAfterAll {
43
43
orcTableDir.mkdir()
44
44
import org .apache .spark .sql .hive .test .TestHive .implicits ._
45
45
46
- // Originally we were using a 10-row RDD for testing. However, when default parallelism is
47
- // greater than 10 (e.g., running on a node with 32 cores), this RDD contains empty partitions,
48
- // which result in empty ORC files. Unfortunately, ORC doesn't handle empty files properly and
49
- // causes build failure on Jenkins, which happens to have 32 cores. Please refer to SPARK-8501
50
- // for more details. To workaround this issue before fixing SPARK-8501, we simply increase row
51
- // number in this RDD to avoid empty partitions.
52
46
sparkContext
53
- .makeRDD(1 to 100 )
47
+ .makeRDD(1 to 10 )
54
48
.map(i => OrcData (i, s " part- $i" ))
55
49
.toDF()
56
50
.registerTempTable(s " orc_temp_table " )
@@ -76,43 +70,43 @@ abstract class OrcSuite extends QueryTest with BeforeAndAfterAll {
76
70
}
77
71
78
72
test(" create temporary orc table" ) {
79
- checkAnswer(sql(" SELECT COUNT(*) FROM normal_orc_source" ), Row (100 ))
73
+ checkAnswer(sql(" SELECT COUNT(*) FROM normal_orc_source" ), Row (10 ))
80
74
81
75
checkAnswer(
82
76
sql(" SELECT * FROM normal_orc_source" ),
83
- (1 to 100 ).map(i => Row (i, s " part- $i" )))
77
+ (1 to 10 ).map(i => Row (i, s " part- $i" )))
84
78
85
79
checkAnswer(
86
80
sql(" SELECT * FROM normal_orc_source where intField > 5" ),
87
- (6 to 100 ).map(i => Row (i, s " part- $i" )))
81
+ (6 to 10 ).map(i => Row (i, s " part- $i" )))
88
82
89
83
checkAnswer(
90
84
sql(" SELECT COUNT(intField), stringField FROM normal_orc_source GROUP BY stringField" ),
91
- (1 to 100 ).map(i => Row (1 , s " part- $i" )))
85
+ (1 to 10 ).map(i => Row (1 , s " part- $i" )))
92
86
}
93
87
94
88
test(" create temporary orc table as" ) {
95
- checkAnswer(sql(" SELECT COUNT(*) FROM normal_orc_as_source" ), Row (100 ))
89
+ checkAnswer(sql(" SELECT COUNT(*) FROM normal_orc_as_source" ), Row (10 ))
96
90
97
91
checkAnswer(
98
92
sql(" SELECT * FROM normal_orc_source" ),
99
- (1 to 100 ).map(i => Row (i, s " part- $i" )))
93
+ (1 to 10 ).map(i => Row (i, s " part- $i" )))
100
94
101
95
checkAnswer(
102
96
sql(" SELECT * FROM normal_orc_source WHERE intField > 5" ),
103
- (6 to 100 ).map(i => Row (i, s " part- $i" )))
97
+ (6 to 10 ).map(i => Row (i, s " part- $i" )))
104
98
105
99
checkAnswer(
106
100
sql(" SELECT COUNT(intField), stringField FROM normal_orc_source GROUP BY stringField" ),
107
- (1 to 100 ).map(i => Row (1 , s " part- $i" )))
101
+ (1 to 10 ).map(i => Row (1 , s " part- $i" )))
108
102
}
109
103
110
104
test(" appending insert" ) {
111
105
sql(" INSERT INTO TABLE normal_orc_source SELECT * FROM orc_temp_table WHERE intField > 5" )
112
106
113
107
checkAnswer(
114
108
sql(" SELECT * FROM normal_orc_source" ),
115
- (1 to 5 ).map(i => Row (i, s " part- $i" )) ++ (6 to 100 ).flatMap { i =>
109
+ (1 to 5 ).map(i => Row (i, s " part- $i" )) ++ (6 to 10 ).flatMap { i =>
116
110
Seq .fill(2 )(Row (i, s " part- $i" ))
117
111
})
118
112
}
@@ -125,7 +119,7 @@ abstract class OrcSuite extends QueryTest with BeforeAndAfterAll {
125
119
126
120
checkAnswer(
127
121
sql(" SELECT * FROM normal_orc_as_source" ),
128
- (6 to 100 ).map(i => Row (i, s " part- $i" )))
122
+ (6 to 10 ).map(i => Row (i, s " part- $i" )))
129
123
}
130
124
}
131
125
0 commit comments