@@ -27,6 +27,7 @@ import org.apache.arrow.vector.file.json.JsonFileReader
27
27
import org .apache .arrow .vector .util .Validator
28
28
29
29
import org .apache .spark .sql .test .SharedSQLContext
30
+ import org .apache .spark .unsafe .types .CalendarInterval
30
31
31
32
32
33
// NOTE - nullable type can be declared as Option[*] or java.lang.*
@@ -88,25 +89,16 @@ class ArrowSuite extends SharedSQLContext {
88
89
test(" string type conversion" ) {
89
90
collectAndValidate(upperCaseData, " test-data/arrow/uppercase-strings.json" )
90
91
collectAndValidate(lowerCaseData, " test-data/arrow/lowercase-strings.json" )
92
+ val nullStringsColOnly = nullStrings.select(nullStrings.columns(1 ))
93
+ collectAndValidate(nullStringsColOnly, " test-data/arrow/null-strings.json" )
91
94
}
92
95
93
96
ignore(" date conversion" ) {
94
- val sdf = new SimpleDateFormat (" yyyy-MM-dd HH:mm:ss.SSS" , Locale .US )
95
- val d1 = new Date (sdf.parse(" 2015-04-08 13:10:15.000" ).getTime)
96
- val d2 = new Date (sdf.parse(" 2015-04-08 13:10:15.000" ).getTime)
97
- val ts1 = new Timestamp (sdf.parse(" 2013-04-08 01:10:15.567" ).getTime)
98
- val ts2 = new Timestamp (sdf.parse(" 2013-04-08 13:10:10.789" ).getTime)
99
- val dateTimeData = Seq ((d1, sdf.format(d1), ts1), (d2, sdf.format(d2), ts2))
100
- .toDF(" a_date" , " b_string" , " c_timestamp" )
101
97
collectAndValidate(dateTimeData, " test-data/arrow/datetimeData-strings.json" )
102
98
}
103
99
104
100
test(" timestamp conversion" ) {
105
- val sdf = new SimpleDateFormat (" yyyy-MM-dd HH:mm:ss.SSS z" , Locale .US )
106
- val ts1 = new Timestamp (sdf.parse(" 2013-04-08 01:10:15.567 UTC" ).getTime)
107
- val ts2 = new Timestamp (sdf.parse(" 2013-04-08 13:10:10.789 UTC" ).getTime)
108
- val dateTimeData = Seq ((ts1), (ts2)).toDF(" a_timestamp" )
109
- collectAndValidate(dateTimeData, " test-data/arrow/timestampData.json" )
101
+ collectAndValidate(dateTimeData.select($" c_timestamp" ), " test-data/arrow/timestampData.json" )
110
102
}
111
103
112
104
// Arrow json reader doesn't support binary data
@@ -120,24 +112,15 @@ class ArrowSuite extends SharedSQLContext {
120
112
121
113
test(" mapped type conversion" ) { }
122
114
123
- test(" other type conversion" ) {
124
- // half-precision
125
- // byte type, or binary
126
- // allNulls
115
+ test(" floating-point NaN" ) {
116
+ val nanData = Seq ((1 , 1.2F , Double .NaN ), (2 , Float .NaN , 1.23 )).toDF(" i" , " NaN_f" , " NaN_d" )
117
+ collectAndValidate(nanData, " test-data/arrow/nanData-floating_point.json" )
127
118
}
128
119
129
- test(" floating-point NaN" ) { }
130
-
131
- test(" other null conversion" ) { }
132
-
133
120
test(" convert int column with null to arrow" ) {
134
121
collectAndValidate(nullInts, " test-data/arrow/null-ints.json" )
135
122
collectAndValidate(testData3, " test-data/arrow/null-ints-mixed.json" )
136
- }
137
-
138
- test(" convert string column with null to arrow" ) {
139
- val nullStringsColOnly = nullStrings.select(nullStrings.columns(1 ))
140
- collectAndValidate(nullStringsColOnly, " test-data/arrow/null-strings.json" )
123
+ collectAndValidate(allNulls, " test-data/arrow/allNulls-ints.json" )
141
124
}
142
125
143
126
test(" empty frame collect" ) {
@@ -146,7 +129,14 @@ class ArrowSuite extends SharedSQLContext {
146
129
}
147
130
148
131
test(" unsupported types" ) {
149
- intercept[UnsupportedOperationException ] {
132
+ def runUnsupported (block : => Unit ): Unit = {
133
+ val msg = intercept[UnsupportedOperationException ] {
134
+ block
135
+ }
136
+ assert(msg.getMessage.contains(" Unsupported data type" ))
137
+ }
138
+
139
+ runUnsupported {
150
140
collectAndValidate(decimalData, " test-data/arrow/decimalData-BigDecimal.json" )
151
141
}
152
142
}
@@ -180,7 +170,7 @@ class ArrowSuite extends SharedSQLContext {
180
170
val jsonSchema = jsonReader.start()
181
171
Validator .compareSchemas(arrowSchema, jsonSchema)
182
172
183
- val arrowRecordBatch = df.collectAsArrow(allocator)
173
+ val arrowRecordBatch = df.collectAsArrow(Some ( allocator) )
184
174
val arrowRoot = new VectorSchemaRoot (arrowSchema, allocator)
185
175
val vectorLoader = new VectorLoader (arrowRoot)
186
176
vectorLoader.load(arrowRecordBatch)
@@ -240,4 +230,14 @@ class ArrowSuite extends SharedSQLContext {
240
230
DoubleData (5 , 0.0001 , None ) ::
241
231
DoubleData (6 , 20000.0 , Some (3.3 )) :: Nil ).toDF()
242
232
}
233
+
234
+ protected lazy val dateTimeData : DataFrame = {
235
+ val sdf = new SimpleDateFormat (" yyyy-MM-dd HH:mm:ss.SSS z" , Locale .US )
236
+ val d1 = new Date (sdf.parse(" 2015-04-08 13:10:15.000 UTC" ).getTime)
237
+ val d2 = new Date (sdf.parse(" 2015-04-08 13:10:15.000 UTC" ).getTime)
238
+ val ts1 = new Timestamp (sdf.parse(" 2013-04-08 01:10:15.567 UTC" ).getTime)
239
+ val ts2 = new Timestamp (sdf.parse(" 2013-04-08 13:10:10.789 UTC" ).getTime)
240
+ Seq ((d1, sdf.format(d1), ts1), (d2, sdf.format(d2), ts2))
241
+ .toDF(" a_date" , " b_string" , " c_timestamp" )
242
+ }
243
243
}
0 commit comments