You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[SPARK-28270][SQL][FOLLOW-UP] Explicitly cast into int/long/decimal in udf-aggregates_part1.sql to avoid Python float limitation
## What changes were proposed in this pull request?
The tests added at #25069 seem flaky in some environments. See #25069 (comment)
Python's string representation of floats can make the tests flaky. See https://docs.python.org/3/tutorial/floatingpoint.html.
I think it's just better to explicitly cast everywhere udf returns a float (or a double) to stay safe. (note that we're not targeting the Python <> Scala value conversions - there are inevitable differences between Python and Scala; therefore, other languages' UDFs cannot guarantee the same results between Python and Scala).
This PR proposes to cast cases to long, integer and decimal explicitly to make the test cases robust.
<details><summary>Diff comparing to 'pgSQL/aggregates_part1.sql'</summary>
<p>
```diff
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out
index 51ca1d5..734634b7388 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out
-3,23 +3,23
-- !query 0
-SELECT avg(four) AS avg_1 FROM onek
+SELECT CAST(avg(udf(four)) AS decimal(10,3)) AS avg_1 FROM onek
-- !query 0 schema
-struct<avg_1:double>
+struct<avg_1:decimal(10,3)>
-- !query 0 output
1.5
-- !query 1
-SELECT avg(a) AS avg_32 FROM aggtest WHERE a < 100
+SELECT CAST(udf(avg(a)) AS decimal(10,3)) AS avg_32 FROM aggtest WHERE a < 100
-- !query 1 schema
-struct<avg_32:double>
+struct<avg_32:decimal(10,3)>
-- !query 1 output
-32.666666666666664
+32.667
-- !query 2
-select CAST(avg(b) AS Decimal(10,3)) AS avg_107_943 FROM aggtest
+select CAST(avg(udf(b)) AS Decimal(10,3)) AS avg_107_943 FROM aggtest
-- !query 2 schema
struct<avg_107_943:decimal(10,3)>
-- !query 2 output
-27,39 +27,39 struct<avg_107_943:decimal(10,3)>
-- !query 3
-SELECT sum(four) AS sum_1500 FROM onek
+SELECT CAST(sum(udf(four)) AS int) AS sum_1500 FROM onek
-- !query 3 schema
-struct<sum_1500:bigint>
+struct<sum_1500:int>
-- !query 3 output
1500
-- !query 4
-SELECT sum(a) AS sum_198 FROM aggtest
+SELECT udf(sum(a)) AS sum_198 FROM aggtest
-- !query 4 schema
-struct<sum_198:bigint>
+struct<sum_198:string>
-- !query 4 output
198
-- !query 5
-SELECT sum(b) AS avg_431_773 FROM aggtest
+SELECT CAST(udf(udf(sum(b))) AS decimal(10,3)) AS avg_431_773 FROM aggtest
-- !query 5 schema
-struct<avg_431_773:double>
+struct<avg_431_773:decimal(10,3)>
-- !query 5 output
-431.77260909229517
+431.773
-- !query 6
-SELECT max(four) AS max_3 FROM onek
+SELECT udf(max(four)) AS max_3 FROM onek
-- !query 6 schema
-struct<max_3:int>
+struct<max_3:string>
-- !query 6 output
3
-- !query 7
-SELECT max(a) AS max_100 FROM aggtest
+SELECT max(CAST(udf(a) AS int)) AS max_100 FROM aggtest
-- !query 7 schema
struct<max_100:int>
-- !query 7 output
-67,245 +67,246 struct<max_100:int>
-- !query 8
-SELECT max(aggtest.b) AS max_324_78 FROM aggtest
+SELECT CAST(udf(udf(max(aggtest.b))) AS decimal(10,3)) AS max_324_78 FROM aggtest
-- !query 8 schema
-struct<max_324_78:float>
+struct<max_324_78:decimal(10,3)>
-- !query 8 output
324.78
-- !query 9
-SELECT stddev_pop(b) FROM aggtest
+SELECT CAST(stddev_pop(udf(b)) AS decimal(10,3)) FROM aggtest
-- !query 9 schema
-struct<stddev_pop(CAST(b AS DOUBLE)):double>
+struct<CAST(stddev_pop(CAST(udf(b) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 9 output
-131.10703231895047
+131.107
-- !query 10
-SELECT stddev_samp(b) FROM aggtest
+SELECT CAST(udf(stddev_samp(b)) AS decimal(10,3)) FROM aggtest
-- !query 10 schema
-struct<stddev_samp(CAST(b AS DOUBLE)):double>
+struct<CAST(udf(stddev_samp(cast(b as double))) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 10 output
-151.38936080399804
+151.389
-- !query 11
-SELECT var_pop(b) FROM aggtest
+SELECT CAST(var_pop(udf(b)) AS decimal(10,3)) FROM aggtest
-- !query 11 schema
-struct<var_pop(CAST(b AS DOUBLE)):double>
+struct<CAST(var_pop(CAST(udf(b) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 11 output
-17189.053923482323
+17189.054
-- !query 12
-SELECT var_samp(b) FROM aggtest
+SELECT CAST(udf(var_samp(b)) AS decimal(10,3)) FROM aggtest
-- !query 12 schema
-struct<var_samp(CAST(b AS DOUBLE)):double>
+struct<CAST(udf(var_samp(cast(b as double))) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 12 output
-22918.738564643096
+22918.739
-- !query 13
-SELECT stddev_pop(CAST(b AS Decimal(38,0))) FROM aggtest
+SELECT CAST(udf(stddev_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest
-- !query 13 schema
-struct<stddev_pop(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
+struct<CAST(udf(stddev_pop(cast(cast(b as decimal(38,0)) as double))) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 13 output
-131.18117242958306
+131.181
-- !query 14
-SELECT stddev_samp(CAST(b AS Decimal(38,0))) FROM aggtest
+SELECT CAST(stddev_samp(CAST(udf(b) AS Decimal(38,0))) AS decimal(10,3)) FROM aggtest
-- !query 14 schema
-struct<stddev_samp(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
+struct<CAST(stddev_samp(CAST(CAST(udf(b) AS DECIMAL(38,0)) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 14 output
-151.47497042966097
+151.475
-- !query 15
-SELECT var_pop(CAST(b AS Decimal(38,0))) FROM aggtest
+SELECT CAST(udf(var_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest
-- !query 15 schema
-struct<var_pop(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
+struct<CAST(udf(var_pop(cast(cast(b as decimal(38,0)) as double))) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 15 output
17208.5
-- !query 16
-SELECT var_samp(CAST(b AS Decimal(38,0))) FROM aggtest
+SELECT CAST(var_samp(udf(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest
-- !query 16 schema
-struct<var_samp(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
+struct<CAST(var_samp(CAST(udf(cast(b as decimal(38,0))) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 16 output
-22944.666666666668
+22944.667
-- !query 17
-SELECT var_pop(1.0), var_samp(2.0)
+SELECT CAST(udf(var_pop(1.0)) AS int), var_samp(udf(2.0))
-- !query 17 schema
-struct<var_pop(CAST(1.0 AS DOUBLE)):double,var_samp(CAST(2.0 AS DOUBLE)):double>
+struct<CAST(udf(var_pop(cast(1.0 as double))) AS INT):int,var_samp(CAST(udf(2.0) AS DOUBLE)):double>
-- !query 17 output
-0.0 NaN
+0 NaN
-- !query 18
-SELECT stddev_pop(CAST(3.0 AS Decimal(38,0))), stddev_samp(CAST(4.0 AS Decimal(38,0)))
+SELECT CAST(stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))) AS int), stddev_samp(CAST(udf(4.0) AS Decimal(38,0)))
-- !query 18 schema
-struct<stddev_pop(CAST(CAST(3.0 AS DECIMAL(38,0)) AS DOUBLE)):double,stddev_samp(CAST(CAST(4.0 AS DECIMAL(38,0)) AS DOUBLE)):double>
+struct<CAST(stddev_pop(CAST(udf(cast(3.0 as decimal(38,0))) AS DOUBLE)) AS INT):int,stddev_samp(CAST(CAST(udf(4.0) AS DECIMAL(38,0)) AS DOUBLE)):double>
-- !query 18 output
-0.0 NaN
+0 NaN
-- !query 19
-select sum(CAST(null AS int)) from range(1,4)
+select sum(udf(CAST(null AS int))) from range(1,4)
-- !query 19 schema
-struct<sum(CAST(NULL AS INT)):bigint>
+struct<sum(CAST(udf(cast(null as int)) AS DOUBLE)):double>
-- !query 19 output
NULL
-- !query 20
-select sum(CAST(null AS long)) from range(1,4)
+select sum(udf(CAST(null AS long))) from range(1,4)
-- !query 20 schema
-struct<sum(CAST(NULL AS BIGINT)):bigint>
+struct<sum(CAST(udf(cast(null as bigint)) AS DOUBLE)):double>
-- !query 20 output
NULL
-- !query 21
-select sum(CAST(null AS Decimal(38,0))) from range(1,4)
+select sum(udf(CAST(null AS Decimal(38,0)))) from range(1,4)
-- !query 21 schema
-struct<sum(CAST(NULL AS DECIMAL(38,0))):decimal(38,0)>
+struct<sum(CAST(udf(cast(null as decimal(38,0))) AS DOUBLE)):double>
-- !query 21 output
NULL
-- !query 22
-select sum(CAST(null AS DOUBLE)) from range(1,4)
+select sum(udf(CAST(null AS DOUBLE))) from range(1,4)
-- !query 22 schema
-struct<sum(CAST(NULL AS DOUBLE)):double>
+struct<sum(CAST(udf(cast(null as double)) AS DOUBLE)):double>
-- !query 22 output
NULL
-- !query 23
-select avg(CAST(null AS int)) from range(1,4)
+select avg(udf(CAST(null AS int))) from range(1,4)
-- !query 23 schema
-struct<avg(CAST(NULL AS INT)):double>
+struct<avg(CAST(udf(cast(null as int)) AS DOUBLE)):double>
-- !query 23 output
NULL
-- !query 24
-select avg(CAST(null AS long)) from range(1,4)
+select avg(udf(CAST(null AS long))) from range(1,4)
-- !query 24 schema
-struct<avg(CAST(NULL AS BIGINT)):double>
+struct<avg(CAST(udf(cast(null as bigint)) AS DOUBLE)):double>
-- !query 24 output
NULL
-- !query 25
-select avg(CAST(null AS Decimal(38,0))) from range(1,4)
+select avg(udf(CAST(null AS Decimal(38,0)))) from range(1,4)
-- !query 25 schema
-struct<avg(CAST(NULL AS DECIMAL(38,0))):decimal(38,4)>
+struct<avg(CAST(udf(cast(null as decimal(38,0))) AS DOUBLE)):double>
-- !query 25 output
NULL
-- !query 26
-select avg(CAST(null AS DOUBLE)) from range(1,4)
+select avg(udf(CAST(null AS DOUBLE))) from range(1,4)
-- !query 26 schema
-struct<avg(CAST(NULL AS DOUBLE)):double>
+struct<avg(CAST(udf(cast(null as double)) AS DOUBLE)):double>
-- !query 26 output
NULL
-- !query 27
-select sum(CAST('NaN' AS DOUBLE)) from range(1,4)
+select sum(CAST(udf('NaN') AS DOUBLE)) from range(1,4)
-- !query 27 schema
-struct<sum(CAST(NaN AS DOUBLE)):double>
+struct<sum(CAST(udf(NaN) AS DOUBLE)):double>
-- !query 27 output
NaN
-- !query 28
-select avg(CAST('NaN' AS DOUBLE)) from range(1,4)
+select avg(CAST(udf('NaN') AS DOUBLE)) from range(1,4)
-- !query 28 schema
-struct<avg(CAST(NaN AS DOUBLE)):double>
+struct<avg(CAST(udf(NaN) AS DOUBLE)):double>
-- !query 28 output
NaN
-- !query 30
-SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
+SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
FROM (VALUES ('Infinity'), ('1')) v(x)
-- !query 30 schema
-struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
+struct<avg(CAST(udf(x) AS DOUBLE)):double,var_pop(CAST(udf(x) AS DOUBLE)):double>
-- !query 30 output
Infinity NaN
-- !query 31
-SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
+SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
FROM (VALUES ('Infinity'), ('Infinity')) v(x)
-- !query 31 schema
-struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
+struct<avg(CAST(udf(x) AS DOUBLE)):double,var_pop(CAST(udf(x) AS DOUBLE)):double>
-- !query 31 output
Infinity NaN
-- !query 32
-SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
+SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
FROM (VALUES ('-Infinity'), ('Infinity')) v(x)
-- !query 32 schema
-struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
+struct<avg(CAST(udf(x) AS DOUBLE)):double,var_pop(CAST(udf(x) AS DOUBLE)):double>
-- !query 32 output
NaN NaN
-- !query 33
-SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
+SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS int), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3))
FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x)
-- !query 33 schema
-struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
+struct<CAST(avg(CAST(udf(cast(x as double)) AS DOUBLE)) AS INT):int,CAST(udf(var_pop(cast(x as double))) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 33 output
-1.00000005E8 2.5
+100000005 2.5
-- !query 34
-SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
+SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS long), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3))
FROM (VALUES (7000000000005), (7000000000007)) v(x)
-- !query 34 schema
-struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
+struct<CAST(avg(CAST(udf(cast(x as double)) AS DOUBLE)) AS BIGINT):bigint,CAST(udf(var_pop(cast(x as double))) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 34 output
-7.000000000006E12 1.0
+7000000000006 1
-- !query 35
-SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest
+SELECT CAST(udf(covar_pop(b, udf(a))) AS decimal(10,3)), CAST(covar_samp(udf(b), a) as decimal(10,3)) FROM aggtest
-- !query 35 schema
-struct<covar_pop(CAST(b AS DOUBLE), CAST(a AS DOUBLE)):double,covar_samp(CAST(b AS DOUBLE), CAST(a AS DOUBLE)):double>
+struct<CAST(udf(covar_pop(cast(b as double), cast(udf(a) as double))) AS DECIMAL(10,3)):decimal(10,3),CAST(covar_samp(CAST(udf(b) AS DOUBLE), CAST(a AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 35 output
-653.6289553875104 871.5052738500139
+653.629 871.505
-- !query 36
-SELECT corr(b, a) FROM aggtest
+SELECT CAST(corr(b, udf(a)) AS decimal(10,3)) FROM aggtest
-- !query 36 schema
-struct<corr(CAST(b AS DOUBLE), CAST(a AS DOUBLE)):double>
+struct<CAST(corr(CAST(b AS DOUBLE), CAST(udf(a) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
-- !query 36 output
-0.1396345165178734
+0.14
-- !query 37
-SELECT count(four) AS cnt_1000 FROM onek
+SELECT count(udf(four)) AS cnt_1000 FROM onek
-- !query 37 schema
struct<cnt_1000:bigint>
-- !query 37 output
-313,18 +314,18 struct<cnt_1000:bigint>
-- !query 38
-SELECT count(DISTINCT four) AS cnt_4 FROM onek
+SELECT udf(count(DISTINCT four)) AS cnt_4 FROM onek
-- !query 38 schema
-struct<cnt_4:bigint>
+struct<cnt_4:string>
-- !query 38 output
4
-- !query 39
-select ten, count(*), sum(four) from onek
+select ten, udf(count(*)), CAST(sum(udf(four)) AS int) from onek
group by ten order by ten
-- !query 39 schema
-struct<ten:int,count(1):bigint,sum(four):bigint>
+struct<ten:int,udf(count(1)):string,CAST(sum(CAST(udf(four) AS DOUBLE)) AS INT):int>
-- !query 39 output
0 100 100
1 100 200
-339,10 +340,10 struct<ten:int,count(1):bigint,sum(four):bigint>
-- !query 40
-select ten, count(four), sum(DISTINCT four) from onek
+select ten, count(udf(four)), udf(sum(DISTINCT four)) from onek
group by ten order by ten
-- !query 40 schema
-struct<ten:int,count(four):bigint,sum(DISTINCT four):bigint>
+struct<ten:int,count(udf(four)):bigint,udf(sum(distinct cast(four as bigint))):string>
-- !query 40 output
0 100 2
1 100 4
-357,11 +358,11 struct<ten:int,count(four):bigint,sum(DISTINCT four):bigint>
-- !query 41
-select ten, sum(distinct four) from onek a
+select ten, udf(sum(distinct four)) from onek a
group by ten
-having exists (select 1 from onek b where sum(distinct a.four) = b.four)
+having exists (select 1 from onek b where udf(sum(distinct a.four)) = b.four)
-- !query 41 schema
-struct<ten:int,sum(DISTINCT four):bigint>
+struct<ten:int,udf(sum(distinct cast(four as bigint))):string>
-- !query 41 output
0 2
2 2
-374,23 +375,23 struct<ten:int,sum(DISTINCT four):bigint>
select ten, sum(distinct four) from onek a
group by ten
having exists (select 1 from onek b
- where sum(distinct a.four + b.four) = b.four)
+ where sum(distinct a.four + b.four) = udf(b.four))
-- !query 42 schema
struct<>
-- !query 42 output
org.apache.spark.sql.AnalysisException
Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [(sum(DISTINCT CAST((outer() + b.`four`) AS BIGINT)) = CAST(b.`four` AS BIGINT))]
+Expression in where clause: [(sum(DISTINCT CAST((outer() + b.`four`) AS BIGINT)) = CAST(udf(four) AS BIGINT))]
Invalid expressions: [sum(DISTINCT CAST((outer() + b.`four`) AS BIGINT))];
-- !query 43
select
- (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
+ (select udf(max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))))
from tenk1 o
-- !query 43 schema
struct<>
-- !query 43 output
org.apache.spark.sql.AnalysisException
-cannot resolve '`o.unique1`' given input columns: [i.even, i.fivethous, i.four, i.hundred, i.odd, i.string4, i.stringu1, i.stringu2, i.ten, i.tenthous, i.thousand, i.twenty, i.two, i.twothousand, i.unique1, i.unique2]; line 2 pos 63
+cannot resolve '`o.unique1`' given input columns: [i.even, i.fivethous, i.four, i.hundred, i.odd, i.string4, i.stringu1, i.stringu2, i.ten, i.tenthous, i.thousand, i.twenty, i.two, i.twothousand, i.unique1, i.unique2]; line 2 pos 67
```
</p>
</details>
## How was this patch tested?
Manually tested in local.
Also, with JDK 11:
```
Using /.../jdk-11.0.3.jdk/Contents/Home as default JAVA_HOME.
Note, this will be overridden by -java-home if it is set.
[info] Loading project definition from /.../spark/project
[info] Updating {file:/.../spark/project/}spark-build...
...
[info] SQLQueryTestSuite:
...
[info] - udf/pgSQL/udf-aggregates_part1.sql - Scala UDF (17 seconds, 228 milliseconds)
[info] - udf/pgSQL/udf-aggregates_part1.sql - Regular Python UDF (36 seconds, 170 milliseconds)
[info] - udf/pgSQL/udf-aggregates_part1.sql - Scalar Pandas UDF (41 seconds, 132 milliseconds)
...
```
Closes#25110 from HyukjinKwon/SPARK-28270-1.
Authored-by: HyukjinKwon <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
SELECT CAST(sum(udf(four)) AS int) AS sum_1500 FROM onek
31
31
-- !query 3 schema
32
-
struct<sum_1500:double>
32
+
struct<sum_1500:int>
33
33
-- !query 3 output
34
-
1500.0
34
+
1500
35
35
36
36
37
37
-- !query 4
@@ -43,11 +43,11 @@ struct<sum_198:string>
43
43
44
44
45
45
-- !query 5
46
-
SELECT udf(udf(sum(b))) AS avg_431_773 FROM aggtest
46
+
SELECT CAST(udf(udf(sum(b))) AS decimal(10,3)) AS avg_431_773 FROM aggtest
47
47
-- !query 5 schema
48
-
struct<avg_431_773:string>
48
+
struct<avg_431_773:decimal(10,3)>
49
49
-- !query 5 output
50
-
431.77260909229517
50
+
431.773
51
51
52
52
53
53
-- !query 6
@@ -59,99 +59,99 @@ struct<max_3:string>
59
59
60
60
61
61
-- !query 7
62
-
SELECT max(udf(a)) AS max_100 FROM aggtest
62
+
SELECT max(CAST(udf(a) AS int)) AS max_100 FROM aggtest
63
63
-- !query 7 schema
64
-
struct<max_100:string>
64
+
struct<max_100:int>
65
65
-- !query 7 output
66
-
56
66
+
100
67
67
68
68
69
69
-- !query 8
70
-
SELECT CAST(udf(udf(max(aggtest.b))) AS int) AS max_324_78 FROM aggtest
70
+
SELECT CAST(udf(udf(max(aggtest.b))) AS decimal(10,3)) AS max_324_78 FROM aggtest
71
71
-- !query 8 schema
72
-
struct<max_324_78:int>
72
+
struct<max_324_78:decimal(10,3)>
73
73
-- !query 8 output
74
-
324
74
+
324.78
75
75
76
76
77
77
-- !query 9
78
-
SELECT CAST(stddev_pop(udf(b)) AS int) FROM aggtest
78
+
SELECT CAST(stddev_pop(udf(b)) AS decimal(10,3)) FROM aggtest
79
79
-- !query 9 schema
80
-
struct<CAST(stddev_pop(CAST(udf(b) AS DOUBLE)) AS INT):int>
80
+
struct<CAST(stddev_pop(CAST(udf(b) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
81
81
-- !query 9 output
82
-
131
82
+
131.107
83
83
84
84
85
85
-- !query 10
86
-
SELECT udf(stddev_samp(b)) FROM aggtest
86
+
SELECT CAST(udf(stddev_samp(b)) AS decimal(10,3)) FROM aggtest
87
87
-- !query 10 schema
88
-
struct<udf(stddev_samp(cast(b as double))):string>
88
+
struct<CAST(udf(stddev_samp(cast(b as double))) AS DECIMAL(10,3)):decimal(10,3)>
89
89
-- !query 10 output
90
-
151.38936080399804
90
+
151.389
91
91
92
92
93
93
-- !query 11
94
-
SELECT CAST(var_pop(udf(b)) as int) FROM aggtest
94
+
SELECT CAST(var_pop(udf(b)) AS decimal(10,3)) FROM aggtest
95
95
-- !query 11 schema
96
-
struct<CAST(var_pop(CAST(udf(b) AS DOUBLE)) AS INT):int>
96
+
struct<CAST(var_pop(CAST(udf(b) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
97
97
-- !query 11 output
98
-
17189
98
+
17189.054
99
99
100
100
101
101
-- !query 12
102
-
SELECT udf(var_samp(b)) FROM aggtest
102
+
SELECT CAST(udf(var_samp(b)) AS decimal(10,3)) FROM aggtest
103
103
-- !query 12 schema
104
-
struct<udf(var_samp(cast(b as double))):string>
104
+
struct<CAST(udf(var_samp(cast(b as double))) AS DECIMAL(10,3)):decimal(10,3)>
105
105
-- !query 12 output
106
-
22918.738564643096
106
+
22918.739
107
107
108
108
109
109
-- !query 13
110
-
SELECT udf(stddev_pop(CAST(b AS Decimal(38,0)))) FROM aggtest
110
+
SELECT CAST(udf(stddev_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest
111
111
-- !query 13 schema
112
-
struct<udf(stddev_pop(cast(cast(b as decimal(38,0)) as double))):string>
112
+
struct<CAST(udf(stddev_pop(cast(cast(b as decimal(38,0)) as double))) AS DECIMAL(10,3)):decimal(10,3)>
113
113
-- !query 13 output
114
-
131.18117242958306
114
+
131.181
115
115
116
116
117
117
-- !query 14
118
-
SELECT stddev_samp(CAST(udf(b) AS Decimal(38,0))) FROM aggtest
118
+
SELECT CAST(stddev_samp(CAST(udf(b) AS Decimal(38,0))) AS decimal(10,3)) FROM aggtest
119
119
-- !query 14 schema
120
-
struct<stddev_samp(CAST(CAST(udf(b) AS DECIMAL(38,0)) AS DOUBLE)):double>
120
+
struct<CAST(stddev_samp(CAST(CAST(udf(b) AS DECIMAL(38,0)) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
121
121
-- !query 14 output
122
-
151.47497042966097
122
+
151.475
123
123
124
124
125
125
-- !query 15
126
-
SELECT udf(var_pop(CAST(b AS Decimal(38,0)))) FROM aggtest
126
+
SELECT CAST(udf(var_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest
127
127
-- !query 15 schema
128
-
struct<udf(var_pop(cast(cast(b as decimal(38,0)) as double))):string>
128
+
struct<CAST(udf(var_pop(cast(cast(b as decimal(38,0)) as double))) AS DECIMAL(10,3)):decimal(10,3)>
129
129
-- !query 15 output
130
130
17208.5
131
131
132
132
133
133
-- !query 16
134
-
SELECT var_samp(udf(CAST(b AS Decimal(38,0)))) FROM aggtest
134
+
SELECT CAST(var_samp(udf(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest
135
135
-- !query 16 schema
136
-
struct<var_samp(CAST(udf(cast(b as decimal(38,0))) AS DOUBLE)):double>
136
+
struct<CAST(var_samp(CAST(udf(cast(b as decimal(38,0))) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
137
137
-- !query 16 output
138
-
22944.666666666668
138
+
22944.667
139
139
140
140
141
141
-- !query 17
142
-
SELECT udf(var_pop(1.0)), var_samp(udf(2.0))
142
+
SELECT CAST(udf(var_pop(1.0)) AS int), var_samp(udf(2.0))
143
143
-- !query 17 schema
144
-
struct<udf(var_pop(cast(1.0 as double))):string,var_samp(CAST(udf(2.0) AS DOUBLE)):double>
144
+
struct<CAST(udf(var_pop(cast(1.0 as double))) AS INT):int,var_samp(CAST(udf(2.0) AS DOUBLE)):double>
145
145
-- !query 17 output
146
-
0.0 NaN
146
+
0 NaN
147
147
148
148
149
149
-- !query 18
150
-
SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS Decimal(38,0)))
150
+
SELECT CAST(stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))) AS int), stddev_samp(CAST(udf(4.0) AS Decimal(38,0)))
151
151
-- !query 18 schema
152
-
struct<stddev_pop(CAST(udf(cast(3.0 as decimal(38,0))) AS DOUBLE)):double,stddev_samp(CAST(CAST(udf(4.0) AS DECIMAL(38,0)) AS DOUBLE)):double>
152
+
struct<CAST(stddev_pop(CAST(udf(cast(3.0 as decimal(38,0))) AS DOUBLE)) AS INT):int,stddev_samp(CAST(CAST(udf(4.0) AS DECIMAL(38,0)) AS DOUBLE)):double>
153
153
-- !query 18 output
154
-
0.0 NaN
154
+
0 NaN
155
155
156
156
157
157
-- !query 19
@@ -262,37 +262,37 @@ NaN NaN
262
262
263
263
264
264
-- !query 32
265
-
SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE)))
265
+
SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS int), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3))
266
266
FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x)
267
267
-- !query 32 schema
268
-
struct<avg(CAST(udf(cast(x as double)) AS DOUBLE)):double,udf(var_pop(cast(x as double))):string>
268
+
struct<CAST(avg(CAST(udf(cast(x as double)) AS DOUBLE)) AS INT):int,CAST(udf(var_pop(cast(x as double))) AS DECIMAL(10,3)):decimal(10,3)>
269
269
-- !query 32 output
270
-
1.00000005E8 2.5
270
+
100000005 2.5
271
271
272
272
273
273
-- !query 33
274
-
SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE)))
274
+
SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS long), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3))
275
275
FROM (VALUES (7000000000005), (7000000000007)) v(x)
276
276
-- !query 33 schema
277
-
struct<avg(CAST(udf(cast(x as double)) AS DOUBLE)):double,udf(var_pop(cast(x as double))):string>
277
+
struct<CAST(avg(CAST(udf(cast(x as double)) AS DOUBLE)) AS BIGINT):bigint,CAST(udf(var_pop(cast(x as double))) AS DECIMAL(10,3)):decimal(10,3)>
278
278
-- !query 33 output
279
-
7.000000000006E12 1.0
279
+
7000000000006 1
280
280
281
281
282
282
-- !query 34
283
-
SELECT CAST(udf(covar_pop(b, udf(a))) AS int), CAST(covar_samp(udf(b), a) as int) FROM aggtest
283
+
SELECT CAST(udf(covar_pop(b, udf(a))) AS decimal(10,3)), CAST(covar_samp(udf(b), a) as decimal(10,3)) FROM aggtest
284
284
-- !query 34 schema
285
-
struct<CAST(udf(covar_pop(cast(b as double), cast(udf(a) as double))) AS INT):int,CAST(covar_samp(CAST(udf(b) AS DOUBLE), CAST(a AS DOUBLE)) AS INT):int>
285
+
struct<CAST(udf(covar_pop(cast(b as double), cast(udf(a) as double))) AS DECIMAL(10,3)):decimal(10,3),CAST(covar_samp(CAST(udf(b) AS DOUBLE), CAST(a AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
286
286
-- !query 34 output
287
-
653 871
287
+
653.629 871.505
288
288
289
289
290
290
-- !query 35
291
-
SELECT corr(b, udf(a)) FROM aggtest
291
+
SELECT CAST(corr(b, udf(a)) AS decimal(10,3)) FROM aggtest
292
292
-- !query 35 schema
293
-
struct<corr(CAST(b AS DOUBLE), CAST(udf(a) AS DOUBLE)):double>
293
+
struct<CAST(corr(CAST(b AS DOUBLE), CAST(udf(a) AS DOUBLE)) AS DECIMAL(10,3)):decimal(10,3)>
294
294
-- !query 35 output
295
-
0.1396345165178734
295
+
0.14
296
296
297
297
298
298
-- !query 36
@@ -312,21 +312,21 @@ struct<cnt_4:string>
312
312
313
313
314
314
-- !query 38
315
-
select ten, udf(count(*)), sum(udf(four)) from onek
315
+
select ten, udf(count(*)), CAST(sum(udf(four)) AS int) from onek
316
316
group by ten order by ten
317
317
-- !query 38 schema
318
-
struct<ten:int,udf(count(1)):string,sum(CAST(udf(four) AS DOUBLE)):double>
318
+
struct<ten:int,udf(count(1)):string,CAST(sum(CAST(udf(four) AS DOUBLE)) AS INT):int>
0 commit comments