Skip to content

Commit 70da6e0

Browse files
authored
TIKA-4581: Fix packaging issues and allow plugin-roots override (#2486)
* TIKA-4581: Add --plugin-roots CLI parameter for tika-grpc Adds --plugin-roots command-line parameter to override plugin-roots from config file. Problem: Users had to include 'plugin-roots' in tika-config.json file, which made Docker deployments less flexible. Different environments might need different plugin locations. Solution: - Added --plugin-roots CLI parameter to TikaGrpcServer - Parameter accepts comma-separated list of plugin directories - CLI parameter overrides config file if specified - Falls back to config file if not specified Changes: - TikaGrpcServer: Added --plugin-roots parameter - TikaGrpcServerImpl: Updated constructor to accept pluginRootsOverride - TikaPluginManager: Added loadFromPaths() method for string-based paths Usage: java -jar tika-grpc.jar -c config.json --plugin-roots /tmp/tika-plugins Or Docker: docker run apache/tika-grpc:latest -c /config/config.json --plugin-roots /tmp/tika-plugins Benefits: - No need to modify config files for different environments - Simplifies Docker/Kubernetes deployments - Backward compatible - config file still works if CLI not specified * TIKA-4581: Copy all plugin ZIPs to tika-grpc/target/plugins Adds all 13 plugin modules to the copy-plugins execution: - tika-pipes-az-blob - tika-pipes-csv - tika-pipes-gcs - tika-pipes-ignite - tika-pipes-jdbc - tika-pipes-json - tika-pipes-kafka - tika-pipes-microsoft-graph - tika-pipes-opensearch - tika-pipes-s3 - tika-pipes-solr This ensures all plugins are available for Docker builds. * TIKA-4581: Fix MANIFEST.MF extraction for az-blob, gcs, jdbc plugins The assembly.xml was trying to include MANIFEST.MF from classes/ directory but it only exists in the JAR file. Changed to use dependencySet with unpack to properly extract MANIFEST.MF and extensions.idx from the project artifact JAR into the plugin ZIP. This fixes 'Cannot find the manifest path' errors for these 3 plugins. * TIKA-4581: Fix GCS plugin class name in plugin.properties The plugin.properties referenced the wrong class name: Wrong: org.apache.tika.pipes.emitter.gcs.GCSEmitterPlugin Correct: org.apache.tika.pipes.plugin.gcs.GCSPipesPlugin This caused ClassNotFoundException when loading the GCS plugin.
1 parent 809ffb2 commit 70da6e0

File tree

8 files changed

+298
-120
lines changed

8 files changed

+298
-120
lines changed

tika-grpc/pom.xml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,83 @@
299299
<type>zip</type>
300300
<overWrite>true</overWrite>
301301
</artifactItem>
302+
<artifactItem>
303+
<groupId>org.apache.tika</groupId>
304+
<artifactId>tika-pipes-az-blob</artifactId>
305+
<version>${project.version}</version>
306+
<type>zip</type>
307+
<overWrite>true</overWrite>
308+
</artifactItem>
309+
<artifactItem>
310+
<groupId>org.apache.tika</groupId>
311+
<artifactId>tika-pipes-csv</artifactId>
312+
<version>${project.version}</version>
313+
<type>zip</type>
314+
<overWrite>true</overWrite>
315+
</artifactItem>
316+
<artifactItem>
317+
<groupId>org.apache.tika</groupId>
318+
<artifactId>tika-pipes-gcs</artifactId>
319+
<version>${project.version}</version>
320+
<type>zip</type>
321+
<overWrite>true</overWrite>
322+
</artifactItem>
323+
<artifactItem>
324+
<groupId>org.apache.tika</groupId>
325+
<artifactId>tika-pipes-ignite</artifactId>
326+
<version>${project.version}</version>
327+
<type>zip</type>
328+
<overWrite>true</overWrite>
329+
</artifactItem>
330+
<artifactItem>
331+
<groupId>org.apache.tika</groupId>
332+
<artifactId>tika-pipes-jdbc</artifactId>
333+
<version>${project.version}</version>
334+
<type>zip</type>
335+
<overWrite>true</overWrite>
336+
</artifactItem>
337+
<artifactItem>
338+
<groupId>org.apache.tika</groupId>
339+
<artifactId>tika-pipes-json</artifactId>
340+
<version>${project.version}</version>
341+
<type>zip</type>
342+
<overWrite>true</overWrite>
343+
</artifactItem>
344+
<artifactItem>
345+
<groupId>org.apache.tika</groupId>
346+
<artifactId>tika-pipes-kafka</artifactId>
347+
<version>${project.version}</version>
348+
<type>zip</type>
349+
<overWrite>true</overWrite>
350+
</artifactItem>
351+
<artifactItem>
352+
<groupId>org.apache.tika</groupId>
353+
<artifactId>tika-pipes-microsoft-graph</artifactId>
354+
<version>${project.version}</version>
355+
<type>zip</type>
356+
<overWrite>true</overWrite>
357+
</artifactItem>
358+
<artifactItem>
359+
<groupId>org.apache.tika</groupId>
360+
<artifactId>tika-pipes-opensearch</artifactId>
361+
<version>${project.version}</version>
362+
<type>zip</type>
363+
<overWrite>true</overWrite>
364+
</artifactItem>
365+
<artifactItem>
366+
<groupId>org.apache.tika</groupId>
367+
<artifactId>tika-pipes-s3</artifactId>
368+
<version>${project.version}</version>
369+
<type>zip</type>
370+
<overWrite>true</overWrite>
371+
</artifactItem>
372+
<artifactItem>
373+
<groupId>org.apache.tika</groupId>
374+
<artifactId>tika-pipes-solr</artifactId>
375+
<version>${project.version}</version>
376+
<type>zip</type>
377+
<overWrite>true</overWrite>
378+
</artifactItem>
302379
</artifactItems>
303380
</configuration>
304381
</execution>

tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServer.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ public class TikaGrpcServer {
4949
@Parameter(names = {"-l", "--plugins"}, description = "The tika pipes plugins config file", help = true)
5050
private File tikaPlugins;
5151

52+
@Parameter(names = {"--plugin-roots"}, description = "Comma-separated list of plugin root directories (overrides config file)", help = true)
53+
private String pluginRoots;
54+
5255
@Parameter(names = {"-s", "--secure"}, description = "Enable credentials required to access this grpc server")
5356
private boolean secure;
5457

@@ -93,7 +96,7 @@ public void start() throws Exception {
9396
healthStatusManager.setStatus(TikaGrpcServer.class.getSimpleName(), ServingStatus.SERVING);
9497
server = Grpc
9598
.newServerBuilderForPort(port, creds)
96-
.addService(new TikaGrpcServerImpl(tikaConfigFile.getAbsolutePath()))
99+
.addService(new TikaGrpcServerImpl(tikaConfigFile.getAbsolutePath(), pluginRoots))
97100
.addService(healthStatusManager.getHealthService())
98101
.addService(ProtoReflectionServiceV1.newInstance())
99102
.build()

tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
8383
PluginManager pluginManager;
8484

8585
TikaGrpcServerImpl(String tikaConfigPath) throws TikaConfigException, IOException {
86+
this(tikaConfigPath, null);
87+
}
88+
89+
TikaGrpcServerImpl(String tikaConfigPath, String pluginRootsOverride) throws TikaConfigException, IOException {
8690
File tikaConfigFile = new File(tikaConfigPath);
8791
if (!tikaConfigFile.exists()) {
8892
throw new TikaConfigException("Tika config file does not exist: " + tikaConfigPath);
@@ -102,7 +106,13 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
102106
pipesClient = new PipesClient(pipesConfig, configPath);
103107

104108
try {
105-
pluginManager = TikaPluginManager.load(tikaJsonConfig);
109+
if (pluginRootsOverride != null && !pluginRootsOverride.trim().isEmpty()) {
110+
// Use command-line plugin roots
111+
pluginManager = TikaPluginManager.loadFromPaths(pluginRootsOverride);
112+
} else {
113+
// Use plugin roots from config file
114+
pluginManager = TikaPluginManager.load(tikaJsonConfig);
115+
}
106116
pluginManager.loadPlugins();
107117
pluginManager.startPlugins();
108118
} catch (TikaConfigException e) {
Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,64 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<!--
3-
Licensed to the Apache Software Foundation (ASF) under one
4-
or more contributor license agreements. See the NOTICE file
5-
distributed with this work for additional information
6-
regarding copyright ownership. The ASF licenses this file
7-
to you under the Apache License, Version 2.0 (the
8-
"License"); you may not use this file except in compliance
9-
with the License. You may obtain a copy of the License at
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
109
11-
http://www.apache.org/licenses/LICENSE-2.0
10+
http://www.apache.org/licenses/LICENSE-2.0
1211
13-
Unless required by applicable law or agreed to in writing,
14-
software distributed under the License is distributed on an
15-
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16-
KIND, either express or implied. See the License for the
17-
specific language governing permissions and limitations
18-
under the License.
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
1917
-->
20-
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.2.0"
21-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
22-
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.2.0 http://maven.apache.org/xsd/assembly-2.2.0.xsd">
23-
<id>plugin</id>
24-
<formats>
25-
<format>zip</format>
26-
</formats>
27-
<includeBaseDirectory>true</includeBaseDirectory>
28-
<baseDirectory>${project.artifactId}-${project.version}</baseDirectory>
29-
<fileSets>
30-
<fileSet>
31-
<directory>${project.build.directory}</directory>
32-
<outputDirectory>/</outputDirectory>
33-
<includes>
34-
<include>${project.artifactId}-${project.version}.jar</include>
35-
</includes>
36-
</fileSet>
37-
<fileSet>
38-
<directory>${project.build.directory}/lib</directory>
39-
<outputDirectory>/lib</outputDirectory>
40-
<includes>
41-
<include>*.jar</include>
42-
</includes>
43-
</fileSet>
44-
</fileSets>
18+
<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
19+
xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
20+
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0
21+
http://maven.apache.org/xsd/assembly-2.0.0.xsd">
22+
<id>dependencies-zip</id>
23+
<formats>
24+
<format>zip</format>
25+
</formats>
26+
<includeBaseDirectory>false</includeBaseDirectory>
27+
<dependencySets>
28+
<dependencySet>
29+
<outputDirectory>/</outputDirectory>
30+
<useProjectArtifact>true</useProjectArtifact>
31+
<unpack>true</unpack>
32+
<scope>runtime</scope>
33+
<includes>
34+
<include>${project.groupId}:${project.artifactId}</include>
35+
</includes>
36+
<unpackOptions>
37+
<includes>
38+
<include>META-INF/MANIFEST.MF</include>
39+
<include>META-INF/extensions.idx</include>
40+
</includes>
41+
</unpackOptions>
42+
</dependencySet>
43+
</dependencySets>
44+
<fileSets>
45+
<fileSet>
46+
<directory>${project.build.directory}/lib</directory>
47+
<outputDirectory>/lib</outputDirectory>
48+
</fileSet>
49+
<fileSet>
50+
<directory>${project.build.directory}</directory>
51+
<outputDirectory>/lib</outputDirectory>
52+
<includes>
53+
<include>${project.artifactId}-${project.version}.jar</include>
54+
</includes>
55+
</fileSet>
56+
<fileSet>
57+
<directory>${project.basedir}/src/main/resources</directory>
58+
<outputDirectory>/</outputDirectory>
59+
<includes>
60+
<include>plugin.properties</include>
61+
</includes>
62+
</fileSet>
63+
</fileSets>
4564
</assembly>
Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,64 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<!--
3-
Licensed to the Apache Software Foundation (ASF) under one
4-
or more contributor license agreements. See the NOTICE file
5-
distributed with this work for additional information
6-
regarding copyright ownership. The ASF licenses this file
7-
to you under the Apache License, Version 2.0 (the
8-
"License"); you may not use this file except in compliance
9-
with the License. You may obtain a copy of the License at
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
109
11-
http://www.apache.org/licenses/LICENSE-2.0
10+
http://www.apache.org/licenses/LICENSE-2.0
1211
13-
Unless required by applicable law or agreed to in writing,
14-
software distributed under the License is distributed on an
15-
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16-
KIND, either express or implied. See the License for the
17-
specific language governing permissions and limitations
18-
under the License.
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
1917
-->
20-
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.2.0"
21-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
22-
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.2.0 http://maven.apache.org/xsd/assembly-2.2.0.xsd">
23-
<id>plugin</id>
24-
<formats>
25-
<format>zip</format>
26-
</formats>
27-
<includeBaseDirectory>true</includeBaseDirectory>
28-
<baseDirectory>${project.artifactId}-${project.version}</baseDirectory>
29-
<fileSets>
30-
<fileSet>
31-
<directory>${project.build.directory}</directory>
32-
<outputDirectory>/</outputDirectory>
33-
<includes>
34-
<include>${project.artifactId}-${project.version}.jar</include>
35-
</includes>
36-
</fileSet>
37-
<fileSet>
38-
<directory>${project.build.directory}/lib</directory>
39-
<outputDirectory>/lib</outputDirectory>
40-
<includes>
41-
<include>*.jar</include>
42-
</includes>
43-
</fileSet>
44-
</fileSets>
18+
<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
19+
xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
20+
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0
21+
http://maven.apache.org/xsd/assembly-2.0.0.xsd">
22+
<id>dependencies-zip</id>
23+
<formats>
24+
<format>zip</format>
25+
</formats>
26+
<includeBaseDirectory>false</includeBaseDirectory>
27+
<dependencySets>
28+
<dependencySet>
29+
<outputDirectory>/</outputDirectory>
30+
<useProjectArtifact>true</useProjectArtifact>
31+
<unpack>true</unpack>
32+
<scope>runtime</scope>
33+
<includes>
34+
<include>${project.groupId}:${project.artifactId}</include>
35+
</includes>
36+
<unpackOptions>
37+
<includes>
38+
<include>META-INF/MANIFEST.MF</include>
39+
<include>META-INF/extensions.idx</include>
40+
</includes>
41+
</unpackOptions>
42+
</dependencySet>
43+
</dependencySets>
44+
<fileSets>
45+
<fileSet>
46+
<directory>${project.build.directory}/lib</directory>
47+
<outputDirectory>/lib</outputDirectory>
48+
</fileSet>
49+
<fileSet>
50+
<directory>${project.build.directory}</directory>
51+
<outputDirectory>/lib</outputDirectory>
52+
<includes>
53+
<include>${project.artifactId}-${project.version}.jar</include>
54+
</includes>
55+
</fileSet>
56+
<fileSet>
57+
<directory>${project.basedir}/src/main/resources</directory>
58+
<outputDirectory>/</outputDirectory>
59+
<includes>
60+
<include>plugin.properties</include>
61+
</includes>
62+
</fileSet>
63+
</fileSets>
4564
</assembly>

tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/main/resources/plugin.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# limitations under the License.
1616

1717
plugin.id=tika-pipes-gcs-plugin
18-
plugin.class=org.apache.tika.pipes.emitter.gcs.GCSEmitterPlugin
18+
plugin.class=org.apache.tika.pipes.plugin.gcs.GCSPipesPlugin
1919
plugin.version=4.0.0-SNAPSHOT
2020
plugin.provider=Apache Tika
2121
plugin.description=Pipes for the Google Cloud Storage

0 commit comments

Comments
 (0)