Skip to content

Commit 61d23dd

Browse files
authored
TIKA-4554 - rm ForkParser (#2454)
1 parent b822e8e commit 61d23dd

39 files changed

+10
-4164
lines changed

tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
import org.apache.tika.extractor.DefaultEmbeddedStreamTranslator;
7878
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
7979
import org.apache.tika.extractor.EmbeddedStreamTranslator;
80-
import org.apache.tika.fork.ForkParser;
8180
import org.apache.tika.gui.TikaGUI;
8281
import org.apache.tika.io.TikaInputStream;
8382
import org.apache.tika.language.detect.LanguageHandler;
@@ -204,7 +203,6 @@ public void process(TikaInputStream tis, OutputStream output, Metadata metadata)
204203
private String password = System.getenv("TIKA_PASSWORD");
205204
private Digester digester = null;
206205
private boolean pipeMode = true;
207-
private boolean fork = false;
208206
private boolean prettyPrint;
209207
private final OutputType XML = new OutputType() {
210208
@Override
@@ -430,8 +428,6 @@ public void process(String arg) throws Exception {
430428
convertConfigXmlToJson(arg.substring("--convert-config-xml-to-json=".length()));
431429
} else if (arg.equals("--container-aware") || arg.equals("--container-aware-detector")) {
432430
// ignore, as container-aware detectors are now always used
433-
} else if (arg.equals("-f") || arg.equals("--fork")) {
434-
fork = true;
435431
} else if (arg.startsWith("--config=")) {
436432
configFilePath = arg.substring("--config=".length());
437433
} else if (arg.startsWith("--digest=")) {
@@ -597,7 +593,6 @@ private void usage() {
597593
out.println(" -V or --version Print the Apache Tika version number");
598594
out.println();
599595
out.println(" -g or --gui Start the Apache Tika GUI");
600-
out.println(" -f or --fork Use Fork Mode for out-of-process extraction");
601596
out.println();
602597
out.println(" --config=<tika-config.xml>");
603598
out.println(" TikaConfig file. Must be specified before -g, -s, -f or the dump-x-config !");
@@ -1106,24 +1101,15 @@ public String getPassword(Metadata metadata) {
11061101
private class OutputType {
11071102
public void process(TikaInputStream tis, OutputStream output, Metadata metadata) throws Exception {
11081103
Parser p = parser;
1109-
if (fork) {
1110-
p = new ForkParser(TikaCLI.class.getClassLoader(), p);
1111-
}
11121104
ContentHandler handler = getContentHandler(output, metadata);
1113-
try {
1114-
p.parse(tis, handler, metadata, context);
1115-
// fix for TIKA-596: if a parser doesn't generate
1116-
// XHTML output, the lack of an output document prevents
1117-
// metadata from being output: this fixes that
1118-
if (handler instanceof NoDocumentMetHandler) {
1119-
NoDocumentMetHandler metHandler = (NoDocumentMetHandler) handler;
1120-
if (!metHandler.metOutput()) {
1121-
metHandler.endDocument();
1122-
}
1123-
}
1124-
} finally {
1125-
if (fork) {
1126-
((ForkParser) p).close();
1105+
p.parse(tis, handler, metadata, context);
1106+
// fix for TIKA-596: if a parser doesn't generate
1107+
// XHTML output, the lack of an output document prevents
1108+
// metadata from being output: this fixes that
1109+
if (handler instanceof NoDocumentMetHandler) {
1110+
NoDocumentMetHandler metHandler = (NoDocumentMetHandler) handler;
1111+
if (!metHandler.metOutput()) {
1112+
metHandler.endDocument();
11271113
}
11281114
}
11291115
}

tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -200,17 +200,6 @@ public void testContentAllOutput() throws Exception {
200200
assertTrue(out.contains("body"));
201201
}
202202

203-
/**
204-
* Tests -f option of the cli
205-
*
206-
* @throws Exception
207-
*/
208-
@Test
209-
public void testForkParser() throws Exception {
210-
String content = getParamOutContent("-f", resourcePrefix + "alice.cli.test");
211-
assertTrue(content.contains("finished off the cake"));
212-
}
213-
214203
/**
215204
* Tests -m option of the cli
216205
*

tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
*/
1717
package org.apache.tika.bundle;
1818

19-
import static java.nio.charset.StandardCharsets.UTF_8;
2019
import static org.junit.Assert.assertEquals;
2120
import static org.junit.Assert.assertFalse;
2221
import static org.junit.Assert.assertTrue;
@@ -29,8 +28,6 @@
2928
import java.io.File;
3029
import java.io.FileInputStream;
3130
import java.io.IOException;
32-
import java.io.StringWriter;
33-
import java.io.Writer;
3431
import java.net.URISyntaxException;
3532
import java.nio.file.Paths;
3633
import java.util.Arrays;
@@ -60,7 +57,6 @@
6057
import org.apache.tika.detect.Detector;
6158
import org.apache.tika.exception.EncryptedDocumentException;
6259
import org.apache.tika.exception.TikaException;
63-
import org.apache.tika.fork.ForkParser;
6460
import org.apache.tika.io.TikaInputStream;
6561
import org.apache.tika.metadata.Metadata;
6662
import org.apache.tika.metadata.TikaCoreProperties;
@@ -70,7 +66,6 @@
7066
import org.apache.tika.parser.DefaultParser;
7167
import org.apache.tika.parser.ParseContext;
7268
import org.apache.tika.parser.Parser;
73-
import org.apache.tika.parser.internal.Activator;
7469
import org.apache.tika.parser.ocr.TesseractOCRParser;
7570
import org.apache.tika.sax.BodyContentHandler;
7671

@@ -153,28 +148,6 @@ public void testBundleDetection() throws Exception {
153148
assertEquals(MediaType.application("pdf"), contentTypeDetector.detect(null, metadataPDF, new ParseContext()));
154149
}
155150

156-
@Test
157-
public void testForkParser() throws Exception {
158-
try (ForkParser parser = new ForkParser(Activator.class.getClassLoader(), defaultParser)) {
159-
String data =
160-
"<!DOCTYPE html>\n<html><body><p>test <span>content</span></p></body></html>";
161-
try (TikaInputStream tis = TikaInputStream.get(data.getBytes(UTF_8))) {
162-
Writer writer = new StringWriter();
163-
ContentHandler contentHandler = new BodyContentHandler(writer);
164-
Metadata metadata = new Metadata();
165-
MediaType type = contentTypeDetector.detect(tis, metadata, new ParseContext());
166-
assertEquals(type.toString(), "text/html");
167-
metadata.add(Metadata.CONTENT_TYPE, type.toString());
168-
ParseContext parseCtx = new ParseContext();
169-
parser.parse(tis, contentHandler, metadata, parseCtx);
170-
writer.flush();
171-
String content = writer.toString();
172-
assertTrue(content.length() > 0);
173-
assertEquals("test content", content.trim());
174-
}
175-
}
176-
}
177-
178151
@Test
179152
public void testBundleSimpleText() throws Exception {
180153
Tika tika = new Tika();

tika-core/src/main/java/org/apache/tika/fork/AutoDetectParserFactory.java

Lines changed: 0 additions & 41 deletions
This file was deleted.

tika-core/src/main/java/org/apache/tika/fork/ClassLoaderProxy.java

Lines changed: 0 additions & 153 deletions
This file was deleted.

0 commit comments

Comments
 (0)