-
Notifications
You must be signed in to change notification settings - Fork 78
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Addressing issue #93: CrossRefStream incorrectly assumes /Index value is a 2 element array #97
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,15 +25,18 @@ | |
* | ||
*/ | ||
public class CrossRefStream { | ||
|
||
private PdfStream _xstrm; // The underlying Stream object. | ||
private PdfDictionary _dict; | ||
private int _size; | ||
private int[] _index; | ||
private int _index_size; | ||
private index_range[] _index; | ||
private int[] _fieldSizes; | ||
private int _freeCount; | ||
private Filter[] _filters; | ||
private int _entriesRead; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the addition of |
||
private int _read_range; | ||
private int _read_index; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Try and match any surrounding whitespace conventions. In this case 4 spaces instead of a tab. JHOVE's source code is currently a mess with indentation, but we should avoid adding to the confusion. Also, the JHOVE (and Java) style convention for common variable names is to have them camel-cased with lowercase initial letters, e.g. |
||
|
||
private int _bytesPerEntry; | ||
private long _prevXref; // byte offset to previous xref stream, if any | ||
|
||
|
@@ -42,7 +45,15 @@ public class CrossRefStream { | |
private int _objNum; | ||
private int _objField1; | ||
private int _objField2; | ||
|
||
|
||
/** Range elements of the _index array: | ||
Starting object and number of objects. | ||
*/ | ||
private class index_range { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly, all class names should be camel-case with an initial capital letter, i.e. |
||
public int start; | ||
public int len; | ||
}; | ||
|
||
/** | ||
* Constructor. | ||
* | ||
|
@@ -84,17 +95,32 @@ public boolean isValid () { | |
// format if it's present. | ||
PdfObject indexobj = _dict.get ("Index"); | ||
if (indexobj instanceof PdfArray) { | ||
// Content is an array of values | ||
// - starting object, number of objects | ||
Vector vec = ((PdfArray) indexobj).getContent(); | ||
// This is supposed to have a size of 2. | ||
_index = new int[2]; | ||
PdfSimpleObject idx = (PdfSimpleObject) vec.get (0); | ||
_index[0] = idx.getIntValue (); | ||
idx = (PdfSimpleObject) vec.get (1); | ||
_index[1] = idx.getIntValue (); | ||
int vecSize = vec.size(); | ||
|
||
// Must be an even length array | ||
if (vecSize % 2 != 0) { | ||
return false; | ||
} | ||
_index_size = vecSize / 2; | ||
_index = new index_range[_index_size]; | ||
int i = 0; | ||
ListIterator<PdfSimpleObject> iter = (ListIterator<PdfSimpleObject>) vec.listIterator(); | ||
while(iter.hasNext()) { | ||
PdfSimpleObject idx = iter.next(); | ||
_index[i].start = idx.getIntValue(); | ||
idx = iter.next(); | ||
_index[i++].len = idx.getIntValue(); | ||
} | ||
} | ||
else { | ||
// Set up default index. | ||
_index = new int[] { 0, _size }; | ||
_index_size = 1; | ||
_index = new index_range[1]; | ||
_index[0].start = 0; | ||
_index[0].len = _size; | ||
} | ||
|
||
// Get the field sizes. | ||
|
@@ -142,6 +168,8 @@ public void initRead (RandomAccessFile raf) | |
strm.setFilters (_xstrm.getFilters ()); | ||
strm.initRead (raf); | ||
_entriesRead = 0; | ||
_read_range = 0; | ||
_read_index = 0; | ||
|
||
/* Calculate the total bytes per entry. This may have | ||
* some utility. */ | ||
|
@@ -180,8 +208,12 @@ public boolean readNextObject () throws IOException | |
/* Loop till we find an actual object; we just count | ||
* type 0's, which are free entries. */ | ||
wid = _fieldSizes[0]; | ||
if (_entriesRead++ >= _index[1]) { | ||
return false; // Read full complement | ||
_entriesRead += 1; | ||
if (_read_index++ >= _index[_read_range].len) { | ||
_read_index = 1; | ||
if (_read_range++ >= _index_size) { | ||
return false; // Read full complement | ||
} | ||
} | ||
if (wid != 0) { | ||
/* "Fields requiring more than one byte are stored | ||
|
@@ -220,7 +252,7 @@ public boolean readNextObject () throws IOException | |
} | ||
|
||
if (_objType != 0) { | ||
_objNum = _index[0] + _entriesRead - 1; | ||
_objNum = _index[_read_range].start + _read_index - 1; | ||
return true; | ||
} | ||
++_freeCount; | ||
|
@@ -262,8 +294,16 @@ public int getFreeCount () | |
/** Returns the total object count. */ | ||
public int getNumObjects () | ||
{ | ||
return _index[0] + _index[1]; | ||
return _index[_index_size - 1].start + _index[_index_size - 1].len; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like the original method, this seems like a convoluted way to calculate the value already stored in |
||
} | ||
|
||
public boolean isValidObject(int objNum) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add JavaDocs for any new methods. |
||
for (int i = 0; i < _index_size; i++) { | ||
if (objNum >= _index[i].start && objNum < _index[i].start + _index[i].len) return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
/** Returns the offset of the last object object read. | ||
* This is meaningful only if the last object read | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_index_size
appears to be unnecessary since the same value can be retrieved from_index.length
.