3232import org .apache .druid .query .aggregation .AggregatorFactory ;
3333import org .apache .druid .segment .VirtualColumns ;
3434import org .apache .druid .segment .column .ColumnHolder ;
35+ import org .apache .druid .segment .column .ColumnType ;
36+ import org .apache .druid .segment .column .ValueType ;
3537import org .apache .druid .utils .CollectionUtils ;
3638
3739import javax .annotation .Nullable ;
4951 * <p>
5052 * The operator declares a single ordered {@link #columns} list — the full set of columns in segment order, plus a
5153 * {@link #clusteringColumns} list of NAMES designating the leading prefix of {@link #columns} that rows are
52- * clustered by. The time position is an explicit positional entry in {@link #columns} (named {@code __time}, or the
53- * query-granularity column {@link Granularities#GRANULARITY_VIRTUAL_COLUMN_NAME}); clustering by the time column is not
54- * yet supported, so the time marker must be a non-clustering column. A clustered base table is never rollup and has
55- * no metric columns.
54+ * clustered by. The time position is an explicit positional entry in {@link #columns} named {@code __time}; clustering
55+ * by the time column is not yet supported, so {@code __time} must be a non-clustering column. A clustered base table
56+ * is never rollup and has no metric columns.
5657 * <p>
5758 * {@link #getDimensionsSpec()} returns the unified spec built from {@link #columns} in declared order with
5859 * {@code forceSegmentSortByTime=false}; {@link #getOrdering()} is computed as every column of {@link #columns}
5960 * ascending, in list order.
6061 * <p>
61- * Query granularity, when wanted, is just another entry in {@link #getVirtualColumns()} named
62- * {@link Granularities#GRANULARITY_VIRTUAL_COLUMN_NAME}; absent that virtual column the query granularity is
63- * {@code NONE}. Segment granularity and intervals live on the top-level
62+ * Query granularity, when wanted, is a virtual column in {@link #getVirtualColumns()} named
63+ * {@link Granularities#GRANULARITY_VIRTUAL_COLUMN_NAME}. It is a granularity <em>carrier</em>: it supplies the
64+ * granularity that floors the stored {@code __time} column, and is NOT itself a stored column, so it never appears in
65+ * {@link #columns} (declare {@code __time} there as the time column). Absent that virtual column the query granularity
66+ * is {@code NONE}.
6467 * {@link org.apache.druid.indexer.granularity.SegmentGranularitySpec}, not here.
6568 */
6669@ JsonTypeName (ClusteredValueGroupsBaseTableProjectionSpec .TYPE_NAME )
@@ -180,9 +183,19 @@ private static void validate(List<DimensionSchema> columns, List<String> cluster
180183 throw clusteringPrefixException (columns , clusteringColumns );
181184 }
182185 for (int i = 0 ; i < clusteringColumns .size (); i ++) {
183- if (!columns .get (i ).getName ().equals (clusteringColumns .get (i ))) {
186+ final DimensionSchema clusteringColumn = columns .get (i );
187+ if (!clusteringColumn .getName ().equals (clusteringColumns .get (i ))) {
184188 throw clusteringPrefixException (columns , clusteringColumns );
185189 }
190+ // Clustering values are dictionary-encoded into per-type dictionaries on the write side, which supports only
191+ // these scalar types; reject anything else up front rather than failing later at ingest.
192+ if (!isSupportedClusteringType (clusteringColumn .getColumnType ())) {
193+ throw InvalidInput .exception (
194+ "clustering column [%s] has unsupported type [%s]; clustering columns must be STRING, LONG, DOUBLE, or FLOAT" ,
195+ clusteringColumn .getName (),
196+ clusteringColumn .getColumnType ()
197+ );
198+ }
186199 }
187200
188201 final Set <String > seen = Sets .newHashSetWithExpectedSize (columns .size ());
@@ -193,40 +206,45 @@ private static void validate(List<DimensionSchema> columns, List<String> cluster
193206 }
194207
195208 int timeIndex = -1 ;
196- boolean bothPresent = false ;
197209 for (int i = 0 ; i < columns .size (); i ++) {
198210 final String name = columns .get (i ).getName ();
199- if (ColumnHolder .TIME_COLUMN_NAME .equals (name ) || Granularities .GRANULARITY_VIRTUAL_COLUMN_NAME .equals (name )) {
200- if (timeIndex >= 0 ) {
201- bothPresent = true ;
202- }
211+ // The query-granularity virtual column is a granularity carrier in virtualColumns (it floors the stored __time
212+ // column); it is not itself a stored column, so it must not be declared in 'columns'.
213+ if (Granularities .GRANULARITY_VIRTUAL_COLUMN_NAME .equals (name )) {
214+ throw InvalidInput .exception (
215+ "[%s] is the query-granularity virtual column, not a stored column; declare it in 'virtualColumns' and use"
216+ + " [%s] as the time column in 'columns'" ,
217+ Granularities .GRANULARITY_VIRTUAL_COLUMN_NAME ,
218+ ColumnHolder .TIME_COLUMN_NAME
219+ );
220+ }
221+ if (ColumnHolder .TIME_COLUMN_NAME .equals (name )) {
203222 timeIndex = i ;
204223 }
205224 }
206225 if (timeIndex < 0 ) {
207226 throw InvalidInput .exception (
208- "clustered base table must include %s (or the query-granularity column [%s]) in 'columns' to define the"
209- + " time position" ,
210- ColumnHolder .TIME_COLUMN_NAME ,
211- Granularities .GRANULARITY_VIRTUAL_COLUMN_NAME
212- );
213- }
214- if (bothPresent ) {
215- throw InvalidInput .exception (
216- "clustered base table must include exactly one of %s / %s in 'columns' to define the time position" ,
217- ColumnHolder .TIME_COLUMN_NAME ,
218- Granularities .GRANULARITY_VIRTUAL_COLUMN_NAME
227+ "clustered base table must include [%s] in 'columns' to define the time position" ,
228+ ColumnHolder .TIME_COLUMN_NAME
219229 );
220230 }
221231 if (timeIndex < clusteringColumns .size ()) {
222232 throw InvalidInput .exception (
223- "clustering by %s / %s is not yet supported; the time column must be a non-clustering column" ,
224- ColumnHolder .TIME_COLUMN_NAME ,
225- Granularities .GRANULARITY_VIRTUAL_COLUMN_NAME
233+ "clustering by [%s] is not yet supported; the time column must be a non-clustering column" ,
234+ ColumnHolder .TIME_COLUMN_NAME
226235 );
227236 }
228237 }
229238
239+ private static boolean isSupportedClusteringType (ColumnType type )
240+ {
241+ return type != null
242+ && (type .is (ValueType .STRING )
243+ || type .is (ValueType .LONG )
244+ || type .is (ValueType .DOUBLE )
245+ || type .is (ValueType .FLOAT ));
246+ }
247+
230248 private static DruidException clusteringPrefixException (
231249 List <DimensionSchema > columns ,
232250 List <String > clusteringColumns
0 commit comments