diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java index 3be3da0f95..5ab1d8e4cd 100644 --- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java +++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java @@ -217,7 +217,6 @@ public void process(InputStream stream, OutputStream output, Metadata metadata) */ private String password = System.getenv("TIKA_PASSWORD"); private DigestingParser.Digester digester = null; - private boolean asyncMode = false; private boolean pipeMode = true; private boolean fork = false; private boolean prettyPrint; @@ -344,12 +343,12 @@ private void configurePDFExtractSettings() { if (configFilePath == null && context.get(PDFParserConfig.class) == null) { PDFParserConfig pdfParserConfig = new PDFParserConfig(); pdfParserConfig.setExtractInlineImages(true); + pdfParserConfig.setExtractIncrementalUpdateInfo(true); pdfParserConfig.setParseIncrementalUpdates(true); String warn = "As a convenience, TikaCLI has turned on extraction of\n" + - "inline images and incremental updates for the PDFParser (TIKA-2374 and " + - "TIKA-4017).\n" + - "Aside from the -z option, this is not the default behavior\n" + - "in Tika generally or in tika-server."; + "inline images and incremental updates for the PDFParser (TIKA-2374, " + + "TIKA-4017 and TIKA-4354).\n" + + "This is not the default behavior in Tika generally or in tika-server."; LOG.info(warn); context.set(PDFParserConfig.class, pdfParserConfig); } @@ -409,8 +408,6 @@ public void process(String arg) throws Exception { // ignore, as container-aware detectors are now always used } else if (arg.equals("-f") || arg.equals("--fork")) { fork = true; - } else if (arg.equals("-a") || arg.equals("--async")) { - asyncMode = true; } else if (arg.startsWith("--config=")) { configFilePath = arg.substring("--config=".length()); } else if (arg.startsWith("--digest=")) { @@ -454,7 +451,6 @@ public void process(String arg) throws Exception { } extractDir = new File(dirPath); } else if (arg.equals("-z") || arg.equals("--extract")) { - configurePDFExtractSettings(); type = NO_OUTPUT; context.set(EmbeddedDocumentExtractor.class, new FileEmbeddedDocumentExtractor()); } else if (arg.equals("-r") || arg.equals("--pretty-print")) { @@ -485,6 +481,7 @@ public void process(String arg) throws Exception { } else { url = new URL(arg); } + configurePDFExtractSettings(); if (recursiveJSON) { handleRecursiveJson(url, System.out); } else { diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java index e6c5c22963..b91f5935a2 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java @@ -281,6 +281,14 @@ public void testJsonMetadataPrettyPrintOutput() throws Exception { assertTrue(fb > -1 && title > -1 && fb > title); } + @Test + public void testDefaultPDFIncrementalUpdateSettings() throws Exception { + String json = getParamOutContent("-J", + resourcePrefix + "testPDF_incrementalUpdates.pdf"); + assertTrue(json.contains("pdf:incrementalUpdateCount\":\"2\"")); + assertTrue(json.contains("embeddedResourceType\":\"VERSION\"")); + } + /** * Tests -l option of the cli * diff --git a/tika-app/src/test/resources/test-data/testPDF_incrementalUpdates.pdf b/tika-app/src/test/resources/test-data/testPDF_incrementalUpdates.pdf new file mode 100644 index 0000000000..8494cc8396 Binary files /dev/null and b/tika-app/src/test/resources/test-data/testPDF_incrementalUpdates.pdf differ