Skip to content

Commit 4de477c

Browse files
pramodsatyaManoj Negi
andcommitted
[native] Add TestAggregations to native-tests
Co-authored-by: Manoj Negi <manojneg@in.ibm.com>
1 parent cca2011 commit 4de477c

7 files changed

Lines changed: 407 additions & 6 deletions

File tree

presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/OptimizeMixedDistinctAggregations.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
package com.facebook.presto.sql.planner.optimizations;
1515

1616
import com.facebook.presto.Session;
17-
import com.facebook.presto.common.QualifiedObjectName;
1817
import com.facebook.presto.metadata.Metadata;
1918
import com.facebook.presto.spi.VariableAllocator;
2019
import com.facebook.presto.spi.WarningCollector;
20+
import com.facebook.presto.spi.function.FunctionHandle;
2121
import com.facebook.presto.spi.function.StandardFunctionResolution;
2222
import com.facebook.presto.spi.plan.AggregationNode;
2323
import com.facebook.presto.spi.plan.AggregationNode.Aggregation;
@@ -51,7 +51,6 @@
5151
import static com.facebook.presto.common.function.OperatorType.EQUAL;
5252
import static com.facebook.presto.common.type.BigintType.BIGINT;
5353
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
54-
import static com.facebook.presto.metadata.BuiltInTypeAndFunctionNamespaceManager.JAVA_BUILTIN_NAMESPACE;
5554
import static com.facebook.presto.spi.plan.AggregationNode.Step.SINGLE;
5655
import static com.facebook.presto.spi.plan.AggregationNode.singleGroupingSet;
5756
import static com.facebook.presto.spi.plan.ProjectNode.Locality.LOCAL;
@@ -214,10 +213,10 @@ public PlanNode visitAggregation(AggregationNode node, RewriteContext<Optional<A
214213
Optional.empty(),
215214
false,
216215
Optional.empty());
217-
QualifiedObjectName functionName = metadata.getFunctionAndTypeManager().getFunctionMetadata(entry.getValue().getFunctionHandle()).getName();
218-
if (functionName.equals(QualifiedObjectName.valueOf(JAVA_BUILTIN_NAMESPACE, "count")) ||
219-
functionName.equals(QualifiedObjectName.valueOf(JAVA_BUILTIN_NAMESPACE, "count_if")) ||
220-
functionName.equals(QualifiedObjectName.valueOf(JAVA_BUILTIN_NAMESPACE, "approx_distinct"))) {
216+
FunctionHandle functionHandle = entry.getValue().getFunctionHandle();
217+
if (functionResolution.isCountFunction(functionHandle) ||
218+
functionResolution.isCountIfFunction(functionHandle) ||
219+
functionResolution.isApproximateCountDistinctFunction(functionHandle)) {
221220
VariableReferenceExpression newVariable = variableAllocator.newVariable(entry.getValue().getCall().getSourceLocation(), "expr", entry.getKey().getType());
222221
aggregations.put(newVariable, aggregation);
223222
coalesceVariablesBuilder.put(newVariable, entry.getKey());

presto-main-base/src/main/java/com/facebook/presto/sql/relational/FunctionResolution.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,12 @@ public boolean isCountFunction(FunctionHandle functionHandle)
312312
return functionAndTypeResolver.getFunctionMetadata(functionHandle).getName().equals(functionAndTypeResolver.qualifyObjectName(QualifiedName.of("count")));
313313
}
314314

315+
@Override
316+
public boolean isCountIfFunction(FunctionHandle functionHandle)
317+
{
318+
return functionAndTypeResolver.getFunctionMetadata(functionHandle).getName().equals(functionAndTypeResolver.qualifyObjectName(QualifiedName.of("count_if")));
319+
}
320+
315321
@Override
316322
public FunctionHandle countFunction()
317323
{

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/PrestoNativeQueryRunnerUtils.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,12 @@ public HiveQueryRunnerBuilder setExtraProperties(Map<String, String> extraProper
235235
return this;
236236
}
237237

238+
public HiveQueryRunnerBuilder setExtraCoordinatorProperties(Map<String, String> extraCoordinatorProperties)
239+
{
240+
this.extraCoordinatorProperties.putAll(extraCoordinatorProperties);
241+
return this;
242+
}
243+
238244
public HiveQueryRunnerBuilder setHiveProperties(Map<String, String> hiveProperties)
239245
{
240246
this.hiveProperties.putAll(hiveProperties);
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.nativetests;
15+
16+
import com.facebook.presto.tests.AbstractTestAggregations;
17+
import org.testng.annotations.Test;
18+
19+
import static java.lang.String.format;
20+
21+
public abstract class AbstractTestAggregationsNative
22+
extends AbstractTestAggregations
23+
{
24+
private static final String QDIGEST_TYPE = "qdigest";
25+
26+
private String storageFormat;
27+
private String approxDistinctUnsupportedSignatureError;
28+
private String charTypeUnsupportedError;
29+
private String mergeAggFunctionUnsupportedError;
30+
private String tdigestAggFunctionUnsupportedError;
31+
private String timeTypeUnsupportedError;
32+
private String valueAtQuantileFunctionUnsupportedError;
33+
34+
public void init(String storageFormat, boolean sidecarEnabled)
35+
{
36+
this.storageFormat = storageFormat;
37+
if (sidecarEnabled) {
38+
charTypeUnsupportedError = ".*Unknown type: char.*";
39+
timeTypeUnsupportedError = ".*Unknown type: time.*";
40+
approxDistinctUnsupportedSignatureError = ".*Unexpected parameters \\(timestamp with time zone.*\\) for function.*";
41+
mergeAggFunctionUnsupportedError = ".*Unexpected parameters \\(qdigest.* for function native.default.merge.*";
42+
tdigestAggFunctionUnsupportedError = ".*Function native.default.tdigest_agg not registered.*";
43+
valueAtQuantileFunctionUnsupportedError = ".*Unexpected parameters \\(qdigest.* for function native.default.value_at_quantile.*";
44+
}
45+
else {
46+
charTypeUnsupportedError = "Failed to parse type.*char";
47+
timeTypeUnsupportedError = "Failed to parse type.*time";
48+
approxDistinctUnsupportedSignatureError = ".*Aggregate function signature is not supported.*";
49+
mergeAggFunctionUnsupportedError = ".*Aggregate function signature is not supported: presto.default.merge.*";
50+
tdigestAggFunctionUnsupportedError = ".*Aggregate function not registered: presto.default.tdigest_agg.*";
51+
valueAtQuantileFunctionUnsupportedError = ".*Scalar function presto.default.value_at_quantile not registered with arguments: \\(QDIGEST.*";
52+
}
53+
}
54+
55+
/// `approx_distinct` aggregate function returns a different value for certain datatypes in Presto C++, see this
56+
/// issue for more details: https://github.com/facebookincubator/velox/issues/9761.
57+
/// `approx_distinct` does not support arguments of type `TIMESTAMP WITH TIME ZONE` in Presto C++, see this issue
58+
/// for more details: https://github.com/prestodb/presto/issues/24815.
59+
/// Presto C++ does not support datatypes `CHAR` and `TIME`, see:
60+
/// https://github.com/prestodb/presto/blob/master/presto-docs/src/main/sphinx/presto_cpp/limitations.rst.
61+
@Override
62+
@Test
63+
public void testApproximateCountDistinct()
64+
{
65+
// test NULL
66+
assertQuery("SELECT approx_distinct(NULL)", "SELECT 0");
67+
assertQuery("SELECT approx_distinct(NULL, 0.023)", "SELECT 0");
68+
69+
// test date
70+
String orderdate = storageFormat.equals("DWRF") ? "cast(orderdate as DATE)" : "orderdate";
71+
assertQuery(format("SELECT approx_distinct(%s) FROM orders", orderdate), "SELECT 2372");
72+
assertQuery(format("SELECT approx_distinct(%s, 0.023) FROM orders", orderdate), "SELECT 2372");
73+
74+
// test timestamp
75+
assertQuery("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP)) FROM orders", "SELECT 2347");
76+
assertQuery("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP), 0.023) FROM orders", "SELECT 2347");
77+
78+
// test timestamp with time zone
79+
assertQueryFails("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP WITH TIME ZONE)) FROM orders",
80+
approxDistinctUnsupportedSignatureError, true);
81+
assertQueryFails("SELECT approx_distinct(CAST(orderdate AS TIMESTAMP WITH TIME ZONE), 0.023) FROM orders",
82+
approxDistinctUnsupportedSignatureError, true);
83+
84+
// test time
85+
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME)) FROM orders", timeTypeUnsupportedError, true);
86+
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME), 0.023) FROM orders", timeTypeUnsupportedError, true);
87+
88+
// test time with time zone
89+
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME WITH TIME ZONE)) FROM orders", timeTypeUnsupportedError, true);
90+
assertQueryFails("SELECT approx_distinct(CAST(from_unixtime(custkey) AS TIME WITH TIME ZONE), 0.023) FROM orders", timeTypeUnsupportedError, true);
91+
92+
// test short decimal
93+
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(18, 0))) FROM orders", "SELECT 990");
94+
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(18, 0)), 0.023) FROM orders", "SELECT 990");
95+
96+
// test long decimal
97+
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(25, 20))) FROM orders", "SELECT 1013");
98+
assertQuery("SELECT approx_distinct(CAST(custkey AS DECIMAL(25, 20)), 0.023) FROM orders", "SELECT 1013");
99+
100+
// test real
101+
assertQuery("SELECT approx_distinct(CAST(custkey AS REAL)) FROM orders", "SELECT 982");
102+
assertQuery("SELECT approx_distinct(CAST(custkey AS REAL), 0.023) FROM orders", "SELECT 982");
103+
104+
// test bigint
105+
assertQuery("SELECT approx_distinct(custkey) FROM orders", "SELECT 990");
106+
assertQuery("SELECT approx_distinct(custkey, 0.023) FROM orders", "SELECT 990");
107+
108+
// test integer
109+
assertQuery("SELECT approx_distinct(CAST(custkey AS INTEGER)) FROM orders", "SELECT 1028");
110+
assertQuery("SELECT approx_distinct(CAST(custkey AS INTEGER), 0.023) FROM orders", "SELECT 1028");
111+
112+
// test smallint
113+
assertQuery("SELECT approx_distinct(CAST(custkey AS SMALLINT)) FROM orders", "SELECT 1023");
114+
assertQuery("SELECT approx_distinct(CAST(custkey AS SMALLINT), 0.023) FROM orders", "SELECT 1023");
115+
116+
// test tinyint
117+
assertQuery("SELECT approx_distinct(CAST((custkey % 128) AS TINYINT)) FROM orders", "SELECT 128");
118+
assertQuery("SELECT approx_distinct(CAST((custkey % 128) AS TINYINT), 0.023) FROM orders", "SELECT 128");
119+
120+
// test double
121+
assertQuery("SELECT approx_distinct(CAST(custkey AS DOUBLE)) FROM orders", "SELECT 1014");
122+
assertQuery("SELECT approx_distinct(CAST(custkey AS DOUBLE), 0.023) FROM orders", "SELECT 1014");
123+
124+
// test varchar
125+
assertQuery("SELECT approx_distinct(CAST(custkey AS VARCHAR)) FROM orders", "SELECT 1036");
126+
assertQuery("SELECT approx_distinct(CAST(custkey AS VARCHAR), 0.023) FROM orders", "SELECT 1036");
127+
128+
// test char
129+
assertQueryFails("SELECT approx_distinct(CAST(CAST(custkey AS VARCHAR) AS CHAR(20))) FROM orders", charTypeUnsupportedError, true);
130+
assertQueryFails("SELECT approx_distinct(CAST(CAST(custkey AS VARCHAR) AS CHAR(20)), 0.023) FROM orders", charTypeUnsupportedError, true);
131+
132+
// test varbinary
133+
assertQuery("SELECT approx_distinct(to_utf8(CAST(custkey AS VARCHAR))) FROM orders", "SELECT 1036");
134+
assertQuery("SELECT approx_distinct(to_utf8(CAST(custkey AS VARCHAR)), 0.023) FROM orders", "SELECT 1036");
135+
}
136+
137+
/// `sum_data_size_for_stats` returns a different value for `Varchar` and `Varbinary` datatypes in Presto C++, see:
138+
/// https://github.com/prestodb/presto/issues/20909. `CHAR` datatype is not supported in Presto C++, see issue:
139+
/// https://github.com/prestodb/presto/issues/21332.
140+
@Override
141+
@Test
142+
public void testSumDataSizeForStats()
143+
{
144+
// varchar
145+
assertQuery("SELECT \"sum_data_size_for_stats\"(comment) FROM orders", "SELECT 787364");
146+
147+
// char
148+
// Presto removes trailing whitespaces when casting to CHAR.
149+
// Hard code the expected data size since there is no easy to way to compute it in H2.
150+
assertQueryFails("SELECT \"sum_data_size_for_stats\"(CAST(comment AS CHAR(1000))) FROM orders",
151+
charTypeUnsupportedError, true);
152+
153+
// varbinary
154+
assertQuery("SELECT \"sum_data_size_for_stats\"(CAST(comment AS VARBINARY)) FROM orders", "SELECT 787364");
155+
156+
// array
157+
assertQuery("SELECT \"sum_data_size_for_stats\"(ARRAY[comment]) FROM orders", "SELECT 847364");
158+
assertQuery("SELECT \"sum_data_size_for_stats\"(ARRAY[comment, comment]) FROM orders", "SELECT 1634728");
159+
160+
// map
161+
assertQuery("SELECT \"sum_data_size_for_stats\"(map(ARRAY[1], ARRAY[comment])) FROM orders", "SELECT 907364");
162+
assertQuery("SELECT \"sum_data_size_for_stats\"(map(ARRAY[1, 2], ARRAY[comment, comment])) FROM orders", "SELECT 1754728");
163+
164+
// row
165+
assertQuery("SELECT \"sum_data_size_for_stats\"(ROW(comment)) FROM orders", "SELECT 847364");
166+
assertQuery("SELECT \"sum_data_size_for_stats\"(ROW(comment, comment)) FROM orders", "SELECT 1634728");
167+
}
168+
169+
/// `max_data_size_for_stats` returns a different value for `Varchar` and `Varbinary` datatypes in Presto C++, see:
170+
/// https://github.com/prestodb/presto/issues/20909. `CHAR` datatype is not supported in Presto C++, see issue:
171+
/// https://github.com/prestodb/presto/issues/21332.
172+
@Override
173+
@Test
174+
public void testMaxDataSizeForStats()
175+
{
176+
// varchar
177+
assertQuery("SELECT \"max_data_size_for_stats\"(comment) FROM orders", "select 82");
178+
179+
// char
180+
assertQueryFails("SELECT \"max_data_size_for_stats\"(CAST(comment AS CHAR(1000))) FROM orders",
181+
charTypeUnsupportedError, true);
182+
183+
// varbinary
184+
assertQuery("SELECT \"max_data_size_for_stats\"(CAST(comment AS VARBINARY)) FROM orders", "select 82");
185+
186+
// max_data_size_for_stats is not needed for array, map and row
187+
}
188+
189+
/// Function `tdigest_agg` is not supported in Presto C++, see: https://github.com/prestodb/presto/issues/24811.
190+
/// `qdigest` datatype is not supported in Presto C++, see: https://github.com/prestodb/presto/issues/24814.
191+
@Override
192+
@Test(dataProvider = "getType")
193+
public void testStatisticalDigest(String type)
194+
{
195+
String errorMessage = type.equals(QDIGEST_TYPE) ? valueAtQuantileFunctionUnsupportedError : tdigestAggFunctionUnsupportedError;
196+
197+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE)), 0.5E0) > 0 FROM lineitem", type),
198+
errorMessage, true);
199+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem", type),
200+
errorMessage, true);
201+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem", type),
202+
errorMessage, true);
203+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2), 0.5E0) > 0 FROM lineitem", type),
204+
errorMessage, true);
205+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3), 0.5E0) > 0 FROM lineitem", type),
206+
errorMessage, true);
207+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4), 0.5E0) > 0 FROM lineitem", type),
208+
errorMessage, true);
209+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2, 0.0001E0), 0.5E0) > 0 FROM lineitem", type),
210+
errorMessage, true);
211+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3, 0.0001E0), 0.5E0) > 0 FROM lineitem", type),
212+
errorMessage, true);
213+
assertQueryFails(format("SELECT value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4, 0.0001E0), 0.5E0) > 0 FROM lineitem", type),
214+
errorMessage, true);
215+
}
216+
217+
/// Function `tdigest_agg` is not supported in Presto C++, see: https://github.com/prestodb/presto/issues/24811.
218+
/// `qdigest` datatype is not supported in Presto C++, see: https://github.com/prestodb/presto/issues/24814.
219+
@Override
220+
@Test(dataProvider = "getType")
221+
public void testStatisticalDigestGroupBy(String type)
222+
{
223+
String errorMessage = type.equals(QDIGEST_TYPE) ? valueAtQuantileFunctionUnsupportedError : tdigestAggFunctionUnsupportedError;
224+
225+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE)), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
226+
errorMessage, true);
227+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
228+
errorMessage, true);
229+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE)), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
230+
errorMessage, true);
231+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
232+
errorMessage, true);
233+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
234+
errorMessage, true);
235+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
236+
errorMessage, true);
237+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(orderkey AS DOUBLE), 2, 0.0001E0), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
238+
errorMessage, true);
239+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 3, 0.0001E0), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
240+
errorMessage, true);
241+
assertQueryFails(format("SELECT partkey, value_at_quantile(%s_agg(CAST(quantity AS DOUBLE), 4, 0.0001E0), 0.5E0) > 0 FROM lineitem GROUP BY partkey", type),
242+
errorMessage, true);
243+
}
244+
245+
/// Function `tdigest_agg` is not supported in Presto C++, see: https://github.com/prestodb/presto/issues/24811.
246+
/// `qdigest` datatype is not supported in Presto C++, see: https://github.com/prestodb/presto/issues/24814.
247+
@Override
248+
@Test(dataProvider = "getType")
249+
public void testStatisticalDigestMerge(String type)
250+
{
251+
String errorMessage = type.equals(QDIGEST_TYPE) ? mergeAggFunctionUnsupportedError : tdigestAggFunctionUnsupportedError;
252+
assertQueryFails(format("SELECT value_at_quantile(merge(%s), 0.5E0) > 0 FROM (SELECT partkey, %s_agg(CAST(orderkey AS DOUBLE)) as %s FROM lineitem GROUP BY partkey)",
253+
type,
254+
type,
255+
type),
256+
errorMessage);
257+
}
258+
259+
/// Aggregate function `merge` is not supported for `tdigest` type in Presto C++, see issue for more details:
260+
/// https://github.com/prestodb/presto/issues/24813. `qdigest` datatype is not supported in Presto C++, see:
261+
/// https://github.com/prestodb/presto/issues/24814.
262+
@Override
263+
@Test(dataProvider = "getType")
264+
public void testStatisticalDigestMergeGroupBy(String type)
265+
{
266+
String errorMessage = type.equals(QDIGEST_TYPE) ? mergeAggFunctionUnsupportedError : tdigestAggFunctionUnsupportedError;
267+
assertQueryFails(format("SELECT partkey, value_at_quantile(merge(%s), 0.5E0) > 0 " +
268+
"FROM (SELECT partkey, suppkey, %s_agg(CAST(orderkey AS DOUBLE)) as %s FROM lineitem GROUP BY partkey, suppkey)" +
269+
"GROUP BY partkey",
270+
type,
271+
type,
272+
type),
273+
errorMessage);
274+
}
275+
}

0 commit comments

Comments
 (0)