From 4ed4b22befb22e0975df61bc9d290f189ca50139 Mon Sep 17 00:00:00 2001 From: "Paul Merchant, Jr" Date: Tue, 19 Jul 2016 09:54:46 -0400 Subject: [PATCH 1/2] Modified CrossRefStream's Index parsing to interpret argument as an array containing an arbitrary set of starting object number and object count pairs instead of a single pair. Calculate number of objects using final start/count pair. (Actual number will be less than or equal to this value.) Modified reading loop to read only objects in the index ranges. Modified PdfModule to test object number against stream's index instead of a range from 0 to the last object number. Attempt to conform to PDF specification http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/pdf_reference_archives/PDFReference15_v6.pdf, page 83 and PDF versions 1.6 and 1.7. --- .../hul/ois/jhove/module/PdfModule.java | 5 +- .../ois/jhove/module/pdf/CrossRefStream.java | 62 +++++++++++++++---- 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java index a82c6ff9c..7365085f1 100644 --- a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java +++ b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java @@ -1207,9 +1207,10 @@ protected boolean readXRefStreams (RepInfo info) throws IOException _xref = new long [no]; _xref2 = new int[no] []; } - if (sObjNum < 0 || sObjNum >= no) { + if (!xstream.isValidObject(sObjNum)) { + // if (sObjNum < 0 || sObjNum >= no) { throw new PdfMalformedException - ("Invalid object number in cross-reference stream", + ("Invalid object number in cross-reference stream " + Integer.toString(sObjNum) + " out of " + Integer.toString(no), _parser.getOffset ()); } _xref[sObjNum] = _startxref; // insert the index of the xref stream itself diff --git a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java index f20490b97..e13995607 100644 --- a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java +++ b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java @@ -26,14 +26,23 @@ */ public class CrossRefStream { + private class index_range { + public int start; + public int len; + }; + private PdfStream _xstrm; // The underlying Stream object. private PdfDictionary _dict; private int _size; - private int[] _index; + private int _index_size; + private index_range[] _index; private int[] _fieldSizes; private int _freeCount; private Filter[] _filters; private int _entriesRead; + private int _read_range; + private int _read_index; + private int _bytesPerEntry; private long _prevXref; // byte offset to previous xref stream, if any @@ -84,17 +93,32 @@ public boolean isValid () { // format if it's present. PdfObject indexobj = _dict.get ("Index"); if (indexobj instanceof PdfArray) { + // Content is an array of values + // - starting object, number of objects Vector vec = ((PdfArray) indexobj).getContent(); - // This is supposed to have a size of 2. - _index = new int[2]; - PdfSimpleObject idx = (PdfSimpleObject) vec.get (0); - _index[0] = idx.getIntValue (); - idx = (PdfSimpleObject) vec.get (1); - _index[1] = idx.getIntValue (); + int vecSize = vec.size(); + + // Must be an even length array + if (vecSize % 2 != 0) { + return false; + } + _index_size = vecSize / 2; + _index = new index_range[_index_size]; + int i = 0; + ListIterator iter = (ListIterator) vec.listIterator(); + while(iter.hasNext()) { + PdfSimpleObject idx = iter.next(); + _index[i].start = idx.getIntValue(); + idx = iter.next(); + _index[i++].len = idx.getIntValue(); + } } else { // Set up default index. - _index = new int[] { 0, _size }; + _index_size = 1; + _index = new index_range[1]; + _index[0].start = 0; + _index[0].len = _size; } // Get the field sizes. @@ -142,6 +166,8 @@ public void initRead (RandomAccessFile raf) strm.setFilters (_xstrm.getFilters ()); strm.initRead (raf); _entriesRead = 0; + _read_range = 0; + _read_index = 0; /* Calculate the total bytes per entry. This may have * some utility. */ @@ -180,8 +206,12 @@ public boolean readNextObject () throws IOException /* Loop till we find an actual object; we just count * type 0's, which are free entries. */ wid = _fieldSizes[0]; - if (_entriesRead++ >= _index[1]) { - return false; // Read full complement + _entriesRead += 1; + if (_read_index++ >= _index[_read_range].len) { + _read_index = 1; + if (_read_range++ >= _index_size) { + return false; // Read full complement + } } if (wid != 0) { /* "Fields requiring more than one byte are stored @@ -220,7 +250,7 @@ public boolean readNextObject () throws IOException } if (_objType != 0) { - _objNum = _index[0] + _entriesRead - 1; + _objNum = _index[_read_range].start + _read_index - 1; return true; } ++_freeCount; @@ -262,8 +292,16 @@ public int getFreeCount () /** Returns the total object count. */ public int getNumObjects () { - return _index[0] + _index[1]; + return _index[_index_size - 1].start + _index[_index_size - 1].len; } + + public boolean isValidObject(int objNum) { + for (int i = 0; i < _index_size; i++) { + if (objNum >= _index[i].start && objNum < _index[i].start + _index[i].len) return true; + } + + return false; + } /** Returns the offset of the last object object read. * This is meaningful only if the last object read From ac2a1dba73252158383193f08542d412441ab653 Mon Sep 17 00:00:00 2001 From: "Paul Merchant, Jr" Date: Fri, 22 Jul 2016 09:19:04 -0400 Subject: [PATCH 2/2] Relocated index_range private class --- .../hul/ois/jhove/module/pdf/CrossRefStream.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java index e13995607..d7ffd23cb 100644 --- a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java +++ b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java @@ -25,12 +25,6 @@ * */ public class CrossRefStream { - - private class index_range { - public int start; - public int len; - }; - private PdfStream _xstrm; // The underlying Stream object. private PdfDictionary _dict; private int _size; @@ -51,7 +45,15 @@ private class index_range { private int _objNum; private int _objField1; private int _objField2; - + + /** Range elements of the _index array: + Starting object and number of objects. + */ + private class index_range { + public int start; + public int len; + }; + /** * Constructor. *