diff --git a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java index a82c6ff9c..7365085f1 100644 --- a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java +++ b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/PdfModule.java @@ -1207,9 +1207,10 @@ protected boolean readXRefStreams (RepInfo info) throws IOException _xref = new long [no]; _xref2 = new int[no] []; } - if (sObjNum < 0 || sObjNum >= no) { + if (!xstream.isValidObject(sObjNum)) { + // if (sObjNum < 0 || sObjNum >= no) { throw new PdfMalformedException - ("Invalid object number in cross-reference stream", + ("Invalid object number in cross-reference stream " + Integer.toString(sObjNum) + " out of " + Integer.toString(no), _parser.getOffset ()); } _xref[sObjNum] = _startxref; // insert the index of the xref stream itself diff --git a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java index f20490b97..d7ffd23cb 100644 --- a/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java +++ b/jhove-modules/src/main/java/edu/harvard/hul/ois/jhove/module/pdf/CrossRefStream.java @@ -25,15 +25,18 @@ * */ public class CrossRefStream { - private PdfStream _xstrm; // The underlying Stream object. private PdfDictionary _dict; private int _size; - private int[] _index; + private int _index_size; + private index_range[] _index; private int[] _fieldSizes; private int _freeCount; private Filter[] _filters; private int _entriesRead; + private int _read_range; + private int _read_index; + private int _bytesPerEntry; private long _prevXref; // byte offset to previous xref stream, if any @@ -42,7 +45,15 @@ public class CrossRefStream { private int _objNum; private int _objField1; private int _objField2; - + + /** Range elements of the _index array: + Starting object and number of objects. + */ + private class index_range { + public int start; + public int len; + }; + /** * Constructor. * @@ -84,17 +95,32 @@ public boolean isValid () { // format if it's present. PdfObject indexobj = _dict.get ("Index"); if (indexobj instanceof PdfArray) { + // Content is an array of values + // - starting object, number of objects Vector vec = ((PdfArray) indexobj).getContent(); - // This is supposed to have a size of 2. - _index = new int[2]; - PdfSimpleObject idx = (PdfSimpleObject) vec.get (0); - _index[0] = idx.getIntValue (); - idx = (PdfSimpleObject) vec.get (1); - _index[1] = idx.getIntValue (); + int vecSize = vec.size(); + + // Must be an even length array + if (vecSize % 2 != 0) { + return false; + } + _index_size = vecSize / 2; + _index = new index_range[_index_size]; + int i = 0; + ListIterator iter = (ListIterator) vec.listIterator(); + while(iter.hasNext()) { + PdfSimpleObject idx = iter.next(); + _index[i].start = idx.getIntValue(); + idx = iter.next(); + _index[i++].len = idx.getIntValue(); + } } else { // Set up default index. - _index = new int[] { 0, _size }; + _index_size = 1; + _index = new index_range[1]; + _index[0].start = 0; + _index[0].len = _size; } // Get the field sizes. @@ -142,6 +168,8 @@ public void initRead (RandomAccessFile raf) strm.setFilters (_xstrm.getFilters ()); strm.initRead (raf); _entriesRead = 0; + _read_range = 0; + _read_index = 0; /* Calculate the total bytes per entry. This may have * some utility. */ @@ -180,8 +208,12 @@ public boolean readNextObject () throws IOException /* Loop till we find an actual object; we just count * type 0's, which are free entries. */ wid = _fieldSizes[0]; - if (_entriesRead++ >= _index[1]) { - return false; // Read full complement + _entriesRead += 1; + if (_read_index++ >= _index[_read_range].len) { + _read_index = 1; + if (_read_range++ >= _index_size) { + return false; // Read full complement + } } if (wid != 0) { /* "Fields requiring more than one byte are stored @@ -220,7 +252,7 @@ public boolean readNextObject () throws IOException } if (_objType != 0) { - _objNum = _index[0] + _entriesRead - 1; + _objNum = _index[_read_range].start + _read_index - 1; return true; } ++_freeCount; @@ -262,8 +294,16 @@ public int getFreeCount () /** Returns the total object count. */ public int getNumObjects () { - return _index[0] + _index[1]; + return _index[_index_size - 1].start + _index[_index_size - 1].len; } + + public boolean isValidObject(int objNum) { + for (int i = 0; i < _index_size; i++) { + if (objNum >= _index[i].start && objNum < _index[i].start + _index[i].len) return true; + } + + return false; + } /** Returns the offset of the last object object read. * This is meaningful only if the last object read