Skip to content

Commit 0a87500

Browse files
committed
TIKA-4581 - fix faulty logic in PipesServer and add intermediate results for concatenated parsing
1 parent c4c3441 commit 0a87500

File tree

2 files changed

+9
-11
lines changed

2 files changed

+9
-11
lines changed

tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ public List<Metadata> parseRecursive(FetchEmitTuple fetchEmitTuple,
169169

170170
public List<Metadata> parseConcatenated(FetchEmitTuple fetchEmitTuple,
171171
HandlerConfig handlerConfig, TikaInputStream stream,
172-
Metadata metadata, ParseContext parseContext) {
172+
Metadata metadata, ParseContext parseContext) throws InterruptedException {
173173

174174
ContentHandlerFactory contentHandlerFactory =
175175
new BasicContentHandlerFactory(handlerConfig.getType(),
@@ -193,7 +193,9 @@ public boolean select(Metadata metadata) {
193193
String containerException = null;
194194
long start = System.currentTimeMillis();
195195
preParse(fetchEmitTuple, stream, metadata, parseContext);
196-
//TODO -- add intermediate
196+
//queue better be empty. we deserve an exception if not
197+
intermediateResult.add(metadata);
198+
countDownLatch.await();
197199
try {
198200
autoDetectParser.parse(stream, handler, metadata, parseContext);
199201
} catch (SAXException e) {

tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -474,15 +474,11 @@ protected void initializeResources() throws TikaException, IOException, SAXExcep
474474
// User doesn't want container documents digested
475475
this.digester = null;
476476
}
477-
if (this.digester != null) {
478-
// If the user hasn't configured an embedded document extractor, set up the
479-
// RUnpackExtractorFactory
480-
if (autoDetectParser.getAutoDetectParserConfig()
481-
.getEmbeddedDocumentExtractorFactory() == null) {
482-
autoDetectParser
483-
.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(
484-
new RUnpackExtractorFactory());
485-
}
477+
478+
// If the user hasn't configured an embedded document extractor, set up the
479+
// RUnpackExtractorFactory
480+
if (autoDetectParser.getAutoDetectParserConfig().getEmbeddedDocumentExtractorFactory() == null) {
481+
autoDetectParser.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(new RUnpackExtractorFactory());
486482
}
487483
this.detector = this.autoDetectParser.getDetector();
488484
this.rMetaParser = new RecursiveParserWrapper(autoDetectParser);

0 commit comments

Comments
 (0)