|
19 | 19 | import java.io.IOException; |
20 | 20 | import java.util.ArrayList; |
21 | 21 | import java.util.List; |
| 22 | +import java.util.Map; |
22 | 23 |
|
| 24 | +import com.fasterxml.jackson.databind.JsonNode; |
23 | 25 | import com.fasterxml.jackson.databind.ObjectMapper; |
24 | 26 | import org.slf4j.Logger; |
25 | 27 | import org.slf4j.LoggerFactory; |
26 | 28 |
|
27 | 29 | import org.apache.tika.config.ConfigContainer; |
28 | 30 | import org.apache.tika.config.JsonConfig; |
29 | 31 | import org.apache.tika.config.loader.ComponentInfo; |
| 32 | +import org.apache.tika.config.loader.ComponentInstantiator; |
30 | 33 | import org.apache.tika.config.loader.ComponentRegistry; |
31 | 34 | import org.apache.tika.config.loader.TikaObjectMapperFactory; |
32 | 35 | import org.apache.tika.exception.TikaConfigException; |
| 36 | +import org.apache.tika.metadata.filter.CompositeMetadataFilter; |
33 | 37 | import org.apache.tika.metadata.filter.MetadataFilter; |
34 | 38 | import org.apache.tika.parser.ParseContext; |
35 | 39 |
|
@@ -68,6 +72,20 @@ public class ParseContextUtils { |
68 | 72 | // Add other known interfaces as needed |
69 | 73 | ); |
70 | 74 |
|
| 75 | + /** |
| 76 | + * Mapping of array config keys to their context keys and composite wrapper factories. |
| 77 | + * Key: config name (e.g., "metadata-filters") |
| 78 | + * Value: (contextKey, componentInterface) |
| 79 | + */ |
| 80 | + private static final Map<String, ArrayConfigInfo> ARRAY_CONFIGS = Map.of( |
| 81 | + "metadata-filters", new ArrayConfigInfo(MetadataFilter.class, MetadataFilter.class) |
| 82 | + ); |
| 83 | + |
| 84 | + /** |
| 85 | + * Holds information about how to process array configs. |
| 86 | + */ |
| 87 | + private record ArrayConfigInfo(Class<?> contextKey, Class<?> componentInterface) {} |
| 88 | + |
71 | 89 | /** |
72 | 90 | * Resolves all friendly-named components from ConfigContainer and adds them to ParseContext. |
73 | 91 | * <p> |
@@ -102,12 +120,27 @@ public static void resolveAll(ParseContext context, ClassLoader classLoader) { |
102 | 120 |
|
103 | 121 | List<String> resolvedKeys = new ArrayList<>(); |
104 | 122 |
|
| 123 | + // First, process known array configs (e.g., "metadata-filters") |
| 124 | + // These don't depend on the other-configs registry |
| 125 | + for (String friendlyName : new ArrayList<>(container.getKeys())) { |
| 126 | + if (ARRAY_CONFIGS.containsKey(friendlyName)) { |
| 127 | + JsonConfig jsonConfig = container.get(friendlyName, null); |
| 128 | + if (jsonConfig != null && resolveArrayConfig(friendlyName, jsonConfig, context, classLoader)) { |
| 129 | + resolvedKeys.add(friendlyName); |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + // Then, try to load the "other-configs" registry for single component configs |
105 | 135 | try { |
106 | | - // Load the "other-configs" registry which includes parse-context components |
107 | 136 | ComponentRegistry registry = new ComponentRegistry("other-configs", classLoader); |
108 | 137 |
|
109 | | - // Iterate through all configs in the container |
110 | 138 | for (String friendlyName : container.getKeys()) { |
| 139 | + // Skip already resolved array configs |
| 140 | + if (resolvedKeys.contains(friendlyName)) { |
| 141 | + continue; |
| 142 | + } |
| 143 | + |
111 | 144 | JsonConfig jsonConfig = container.get(friendlyName, null); |
112 | 145 | if (jsonConfig == null) { |
113 | 146 | continue; |
@@ -143,7 +176,8 @@ public static void resolveAll(ParseContext context, ClassLoader classLoader) { |
143 | 176 | } |
144 | 177 | } |
145 | 178 | } catch (TikaConfigException e) { |
146 | | - LOG.warn("Failed to load other-configs registry for parse-context resolution", e); |
| 179 | + // other-configs registry not available - that's okay, array configs were still processed |
| 180 | + LOG.debug("other-configs registry not available: {}", e.getMessage()); |
147 | 181 | } |
148 | 182 |
|
149 | 183 | // Remove resolved configs from the container |
@@ -191,4 +225,102 @@ private static Class<?> determineContextKey(ComponentInfo info, String friendlyN |
191 | 225 | // Use the single matched interface, or fall back to the component class |
192 | 226 | return matches.isEmpty() ? info.componentClass() : matches.get(0); |
193 | 227 | } |
| 228 | + |
| 229 | + /** |
| 230 | + * Resolves an array config entry (e.g., "metadata-filters") to a composite component. |
| 231 | + * <p> |
| 232 | + * The array can contain either strings (friendly names) or objects: |
| 233 | + * <pre> |
| 234 | + * ["filter-name-1", "filter-name-2"] // String shorthand |
| 235 | + * [{"filter-name-1": {}}, {"filter-name-2": {}}] // Object format |
| 236 | + * </pre> |
| 237 | + * |
| 238 | + * @param configName the config name (e.g., "metadata-filters") |
| 239 | + * @param jsonConfig the JSON configuration (should be an array) |
| 240 | + * @param context the ParseContext to add the resolved component to |
| 241 | + * @param classLoader the ClassLoader to use for loading component classes |
| 242 | + * @return true if resolution was successful |
| 243 | + */ |
| 244 | + @SuppressWarnings("unchecked") |
| 245 | + private static boolean resolveArrayConfig(String configName, JsonConfig jsonConfig, |
| 246 | + ParseContext context, ClassLoader classLoader) { |
| 247 | + ArrayConfigInfo configInfo = ARRAY_CONFIGS.get(configName); |
| 248 | + if (configInfo == null) { |
| 249 | + return false; |
| 250 | + } |
| 251 | + |
| 252 | + try { |
| 253 | + JsonNode arrayNode = MAPPER.readTree(jsonConfig.json()); |
| 254 | + if (!arrayNode.isArray()) { |
| 255 | + LOG.warn("Expected array for '{}', got: {}", configName, arrayNode.getNodeType()); |
| 256 | + return false; |
| 257 | + } |
| 258 | + |
| 259 | + List<Object> components = new ArrayList<>(); |
| 260 | + |
| 261 | + for (JsonNode item : arrayNode) { |
| 262 | + String typeName; |
| 263 | + JsonNode configNode; |
| 264 | + |
| 265 | + if (item.isTextual()) { |
| 266 | + // String shorthand: "component-name" |
| 267 | + typeName = item.asText(); |
| 268 | + configNode = MAPPER.createObjectNode(); |
| 269 | + } else if (item.isObject() && item.size() == 1) { |
| 270 | + // Object format: {"component-name": {...}} |
| 271 | + typeName = item.fieldNames().next(); |
| 272 | + configNode = item.get(typeName); |
| 273 | + } else { |
| 274 | + LOG.warn("Unexpected item format in '{}': {}", configName, item); |
| 275 | + continue; |
| 276 | + } |
| 277 | + |
| 278 | + try { |
| 279 | + Object component = ComponentInstantiator.instantiate( |
| 280 | + typeName, configNode, MAPPER, classLoader); |
| 281 | + components.add(component); |
| 282 | + LOG.debug("Instantiated '{}' for '{}'", typeName, configName); |
| 283 | + } catch (TikaConfigException e) { |
| 284 | + LOG.warn("Failed to instantiate '{}' for '{}': {}", typeName, configName, e.getMessage()); |
| 285 | + } |
| 286 | + } |
| 287 | + |
| 288 | + // Create the composite and add to ParseContext |
| 289 | + if (!components.isEmpty()) { |
| 290 | + Object composite = createComposite(configName, components, configInfo); |
| 291 | + if (composite != null) { |
| 292 | + context.set((Class) configInfo.contextKey(), composite); |
| 293 | + LOG.debug("Resolved '{}' -> {} with {} components", |
| 294 | + configName, composite.getClass().getSimpleName(), components.size()); |
| 295 | + return true; |
| 296 | + } |
| 297 | + } |
| 298 | + } catch (IOException e) { |
| 299 | + LOG.warn("Failed to parse array config '{}': {}", configName, e.getMessage()); |
| 300 | + } |
| 301 | + |
| 302 | + return false; |
| 303 | + } |
| 304 | + |
| 305 | + /** |
| 306 | + * Creates a composite component from a list of individual components. |
| 307 | + * |
| 308 | + * @param configName the config name (for error messages) |
| 309 | + * @param components the list of components |
| 310 | + * @param configInfo the array config info |
| 311 | + * @return the composite component, or null if creation failed |
| 312 | + */ |
| 313 | + @SuppressWarnings("unchecked") |
| 314 | + private static Object createComposite(String configName, List<Object> components, |
| 315 | + ArrayConfigInfo configInfo) { |
| 316 | + // Handle known composite types |
| 317 | + if (configInfo.componentInterface() == MetadataFilter.class) { |
| 318 | + List<MetadataFilter> filters = (List<MetadataFilter>) (List<?>) components; |
| 319 | + return new CompositeMetadataFilter(filters); |
| 320 | + } |
| 321 | + |
| 322 | + // Add more composite types as needed |
| 323 | + LOG.warn("No composite factory for '{}'", configName); |
| 324 | + return null; |
| 325 | + } |
194 | 326 | } |
0 commit comments