21
21
import org .elasticsearch .cluster .ClusterState ;
22
22
import org .elasticsearch .cluster .ClusterStateListener ;
23
23
import org .elasticsearch .cluster .metadata .IndexAbstraction ;
24
+ import org .elasticsearch .cluster .metadata .IndexMetadata ;
24
25
import org .elasticsearch .cluster .node .DiscoveryNode ;
25
26
import org .elasticsearch .cluster .service .ClusterService ;
26
27
import org .elasticsearch .common .settings .Setting ;
44
45
import org .elasticsearch .transport .RemoteTransportException ;
45
46
46
47
import java .util .Collections ;
48
+ import java .util .HashMap ;
47
49
import java .util .HashSet ;
48
50
import java .util .List ;
49
51
import java .util .Map ;
@@ -248,11 +250,14 @@ static boolean hasAtLeastOneGeoipProcessor(ClusterState clusterState) {
248
250
return false ;
249
251
}
250
252
251
- return clusterState .getMetadata ().indices ().values (). stream (). anyMatch ( indexMetadata -> {
253
+ for ( IndexMetadata indexMetadata : clusterState .getMetadata ().indices ().values ()) {
252
254
String defaultPipeline = IndexSettings .DEFAULT_PIPELINE .get (indexMetadata .getSettings ());
253
255
String finalPipeline = IndexSettings .FINAL_PIPELINE .get (indexMetadata .getSettings ());
254
- return checkReferencedPipelines .contains (defaultPipeline ) || checkReferencedPipelines .contains (finalPipeline );
255
- });
256
+ if (checkReferencedPipelines .contains (defaultPipeline ) || checkReferencedPipelines .contains (finalPipeline )) {
257
+ return true ;
258
+ }
259
+ }
260
+ return false ;
256
261
}
257
262
258
263
/**
@@ -265,12 +270,26 @@ static boolean hasAtLeastOneGeoipProcessor(ClusterState clusterState) {
265
270
@ SuppressWarnings ("unchecked" )
266
271
private static Set <String > pipelinesWithGeoIpProcessor (ClusterState clusterState , boolean downloadDatabaseOnPipelineCreation ) {
267
272
List <PipelineConfiguration > configurations = IngestService .getPipelines (clusterState );
273
+ Map <String , PipelineConfiguration > pipelineConfigById = HashMap .newHashMap (configurations .size ());
274
+ for (PipelineConfiguration configuration : configurations ) {
275
+ pipelineConfigById .put (configuration .getId (), configuration );
276
+ }
277
+ // this map is used to keep track of pipelines that have already been checked
278
+ Map <String , Boolean > pipelineHasGeoProcessorById = HashMap .newHashMap (configurations .size ());
268
279
Set <String > ids = new HashSet <>();
269
280
// note: this loop is unrolled rather than streaming-style because it's hot enough to show up in a flamegraph
270
281
for (PipelineConfiguration configuration : configurations ) {
271
282
List <Map <String , Object >> processors = (List <Map <String , Object >>) configuration .getConfig ().get (Pipeline .PROCESSORS_KEY );
272
- if (hasAtLeastOneGeoipProcessor (processors , downloadDatabaseOnPipelineCreation )) {
273
- ids .add (configuration .getId ());
283
+ String pipelineName = configuration .getId ();
284
+ if (pipelineHasGeoProcessorById .containsKey (pipelineName ) == false ) {
285
+ if (hasAtLeastOneGeoipProcessor (
286
+ processors ,
287
+ downloadDatabaseOnPipelineCreation ,
288
+ pipelineConfigById ,
289
+ pipelineHasGeoProcessorById
290
+ )) {
291
+ ids .add (pipelineName );
292
+ }
274
293
}
275
294
}
276
295
return Collections .unmodifiableSet (ids );
@@ -280,13 +299,27 @@ private static Set<String> pipelinesWithGeoIpProcessor(ClusterState clusterState
280
299
* Check if a list of processor contains at least a geoip processor.
281
300
* @param processors List of processors.
282
301
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
302
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
303
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
304
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
305
+ * out (null).
283
306
* @return true if a geoip processor is found in the processor list.
284
307
*/
285
- private static boolean hasAtLeastOneGeoipProcessor (List <Map <String , Object >> processors , boolean downloadDatabaseOnPipelineCreation ) {
308
+ private static boolean hasAtLeastOneGeoipProcessor (
309
+ List <Map <String , Object >> processors ,
310
+ boolean downloadDatabaseOnPipelineCreation ,
311
+ Map <String , PipelineConfiguration > pipelineConfigById ,
312
+ Map <String , Boolean > pipelineHasGeoProcessorById
313
+ ) {
286
314
if (processors != null ) {
287
315
// note: this loop is unrolled rather than streaming-style because it's hot enough to show up in a flamegraph
288
316
for (Map <String , Object > processor : processors ) {
289
- if (hasAtLeastOneGeoipProcessor (processor , downloadDatabaseOnPipelineCreation )) {
317
+ if (hasAtLeastOneGeoipProcessor (
318
+ processor ,
319
+ downloadDatabaseOnPipelineCreation ,
320
+ pipelineConfigById ,
321
+ pipelineHasGeoProcessorById
322
+ )) {
290
323
return true ;
291
324
}
292
325
}
@@ -298,10 +331,19 @@ private static boolean hasAtLeastOneGeoipProcessor(List<Map<String, Object>> pro
298
331
* Check if a processor config is a geoip processor or contains at least a geoip processor.
299
332
* @param processor Processor config.
300
333
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
334
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
335
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
336
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
337
+ * out (null).
301
338
* @return true if a geoip processor is found in the processor list.
302
339
*/
303
340
@ SuppressWarnings ("unchecked" )
304
- private static boolean hasAtLeastOneGeoipProcessor (Map <String , Object > processor , boolean downloadDatabaseOnPipelineCreation ) {
341
+ private static boolean hasAtLeastOneGeoipProcessor (
342
+ Map <String , Object > processor ,
343
+ boolean downloadDatabaseOnPipelineCreation ,
344
+ Map <String , PipelineConfiguration > pipelineConfigById ,
345
+ Map <String , Boolean > pipelineHasGeoProcessorById
346
+ ) {
305
347
if (processor == null ) {
306
348
return false ;
307
349
}
@@ -320,27 +362,51 @@ private static boolean hasAtLeastOneGeoipProcessor(Map<String, Object> processor
320
362
}
321
363
}
322
364
323
- return isProcessorWithOnFailureGeoIpProcessor (processor , downloadDatabaseOnPipelineCreation )
324
- || isForeachProcessorWithGeoipProcessor (processor , downloadDatabaseOnPipelineCreation );
365
+ return isProcessorWithOnFailureGeoIpProcessor (
366
+ processor ,
367
+ downloadDatabaseOnPipelineCreation ,
368
+ pipelineConfigById ,
369
+ pipelineHasGeoProcessorById
370
+ )
371
+ || isForeachProcessorWithGeoipProcessor (
372
+ processor ,
373
+ downloadDatabaseOnPipelineCreation ,
374
+ pipelineConfigById ,
375
+ pipelineHasGeoProcessorById
376
+ )
377
+ || isPipelineProcessorWithGeoIpProcessor (
378
+ processor ,
379
+ downloadDatabaseOnPipelineCreation ,
380
+ pipelineConfigById ,
381
+ pipelineHasGeoProcessorById
382
+ );
325
383
}
326
384
327
385
/**
328
386
* Check if a processor config has an on_failure clause containing at least a geoip processor.
329
387
* @param processor Processor config.
330
388
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
389
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
390
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
391
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
392
+ * out (null).
331
393
* @return true if a geoip processor is found in the processor list.
332
394
*/
333
395
@ SuppressWarnings ("unchecked" )
334
396
private static boolean isProcessorWithOnFailureGeoIpProcessor (
335
397
Map <String , Object > processor ,
336
- boolean downloadDatabaseOnPipelineCreation
398
+ boolean downloadDatabaseOnPipelineCreation ,
399
+ Map <String , PipelineConfiguration > pipelineConfigById ,
400
+ Map <String , Boolean > pipelineHasGeoProcessorById
337
401
) {
338
402
// note: this loop is unrolled rather than streaming-style because it's hot enough to show up in a flamegraph
339
403
for (Object value : processor .values ()) {
340
404
if (value instanceof Map
341
405
&& hasAtLeastOneGeoipProcessor (
342
406
((Map <String , List <Map <String , Object >>>) value ).get ("on_failure" ),
343
- downloadDatabaseOnPipelineCreation
407
+ downloadDatabaseOnPipelineCreation ,
408
+ pipelineConfigById ,
409
+ pipelineHasGeoProcessorById
344
410
)) {
345
411
return true ;
346
412
}
@@ -352,13 +418,84 @@ && hasAtLeastOneGeoipProcessor(
352
418
* Check if a processor is a foreach processor containing at least a geoip processor.
353
419
* @param processor Processor config.
354
420
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
421
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
422
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
423
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
424
+ * out (null).
355
425
* @return true if a geoip processor is found in the processor list.
356
426
*/
357
427
@ SuppressWarnings ("unchecked" )
358
- private static boolean isForeachProcessorWithGeoipProcessor (Map <String , Object > processor , boolean downloadDatabaseOnPipelineCreation ) {
428
+ private static boolean isForeachProcessorWithGeoipProcessor (
429
+ Map <String , Object > processor ,
430
+ boolean downloadDatabaseOnPipelineCreation ,
431
+ Map <String , PipelineConfiguration > pipelineConfigById ,
432
+ Map <String , Boolean > pipelineHasGeoProcessorById
433
+ ) {
359
434
final Map <String , Object > processorConfig = (Map <String , Object >) processor .get ("foreach" );
360
435
return processorConfig != null
361
- && hasAtLeastOneGeoipProcessor ((Map <String , Object >) processorConfig .get ("processor" ), downloadDatabaseOnPipelineCreation );
436
+ && hasAtLeastOneGeoipProcessor (
437
+ (Map <String , Object >) processorConfig .get ("processor" ),
438
+ downloadDatabaseOnPipelineCreation ,
439
+ pipelineConfigById ,
440
+ pipelineHasGeoProcessorById
441
+ );
442
+ }
443
+
444
+ /**
445
+ * Check if a processor is a pipeline processor containing at least a geoip processor. This method also updates
446
+ * pipelineHasGeoProcessorById with a result for any pipelines it looks at.
447
+ * @param processor Processor config.
448
+ * @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
449
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
450
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
451
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
452
+ * out (null).
453
+ * @return true if a geoip processor is found in the processors of this processor if this processor is a pipeline processor.
454
+ */
455
+ @ SuppressWarnings ("unchecked" )
456
+ private static boolean isPipelineProcessorWithGeoIpProcessor (
457
+ Map <String , Object > processor ,
458
+ boolean downloadDatabaseOnPipelineCreation ,
459
+ Map <String , PipelineConfiguration > pipelineConfigById ,
460
+ Map <String , Boolean > pipelineHasGeoProcessorById
461
+ ) {
462
+ final Map <String , Object > processorConfig = (Map <String , Object >) processor .get ("pipeline" );
463
+ if (processorConfig != null ) {
464
+ String pipelineName = (String ) processorConfig .get ("name" );
465
+ if (pipelineName != null ) {
466
+ if (pipelineHasGeoProcessorById .containsKey (pipelineName )) {
467
+ if (pipelineHasGeoProcessorById .get (pipelineName ) == null ) {
468
+ /*
469
+ * If the value is null here, it indicates that this method has been called recursively with the same pipeline name.
470
+ * This will cause a runtime error when the pipeline is executed, but we're avoiding changing existing behavior at
471
+ * server startup time. Instead, we just bail out as quickly as possible. It is possible that this could lead to a
472
+ * geo database not being downloaded for the pipeline, but it doesn't really matter since the pipeline was going to
473
+ * fail anyway.
474
+ */
475
+ pipelineHasGeoProcessorById .put (pipelineName , false );
476
+ }
477
+ } else {
478
+ List <Map <String , Object >> childProcessors = null ;
479
+ PipelineConfiguration config = pipelineConfigById .get (pipelineName );
480
+ if (config != null ) {
481
+ childProcessors = (List <Map <String , Object >>) config .getConfig ().get (Pipeline .PROCESSORS_KEY );
482
+ }
483
+ // We initialize this to null so that we know it's in progress and can use it to avoid stack overflow errors:
484
+ pipelineHasGeoProcessorById .put (pipelineName , null );
485
+ pipelineHasGeoProcessorById .put (
486
+ pipelineName ,
487
+ hasAtLeastOneGeoipProcessor (
488
+ childProcessors ,
489
+ downloadDatabaseOnPipelineCreation ,
490
+ pipelineConfigById ,
491
+ pipelineHasGeoProcessorById
492
+ )
493
+ );
494
+ }
495
+ return pipelineHasGeoProcessorById .get (pipelineName );
496
+ }
497
+ }
498
+ return false ;
362
499
}
363
500
364
501
@ UpdateForV9 // use MINUS_ONE once that means no timeout
0 commit comments