Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressing issue #93: CrossRefStream incorrectly assumes /Index value is a 2 element array #97

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1207,9 +1207,10 @@ protected boolean readXRefStreams (RepInfo info) throws IOException
_xref = new long [no];
_xref2 = new int[no] [];
}
if (sObjNum < 0 || sObjNum >= no) {
if (!xstream.isValidObject(sObjNum)) {
// if (sObjNum < 0 || sObjNum >= no) {
throw new PdfMalformedException
("Invalid object number in cross-reference stream",
("Invalid object number in cross-reference stream " + Integer.toString(sObjNum) + " out of " + Integer.toString(no),
_parser.getOffset ());
}
_xref[sObjNum] = _startxref; // insert the index of the xref stream itself
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,18 @@
*
*/
public class CrossRefStream {

private PdfStream _xstrm; // The underlying Stream object.
private PdfDictionary _dict;
private int _size;
private int[] _index;
private int _index_size;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_index_size appears to be unnecessary since the same value can be retrieved from _index.length.

private index_range[] _index;
private int[] _fieldSizes;
private int _freeCount;
private Filter[] _filters;
private int _entriesRead;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the addition of _read_index below, _entriesRead is no longer in use and can be safely removed.

private int _read_range;
private int _read_index;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try and match any surrounding whitespace conventions. In this case 4 spaces instead of a tab. JHOVE's source code is currently a mess with indentation, but we should avoid adding to the confusion.

Also, the JHOVE (and Java) style convention for common variable names is to have them camel-cased with lowercase initial letters, e.g. _readRange, such as in the surrounding code.


private int _bytesPerEntry;
private long _prevXref; // byte offset to previous xref stream, if any

Expand All @@ -42,7 +45,15 @@ public class CrossRefStream {
private int _objNum;
private int _objField1;
private int _objField2;


/** Range elements of the _index array:
Starting object and number of objects.
*/
private class index_range {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, all class names should be camel-case with an initial capital letter, i.e. IndexRange.

public int start;
public int len;
};

/**
* Constructor.
*
Expand Down Expand Up @@ -84,17 +95,32 @@ public boolean isValid () {
// format if it's present.
PdfObject indexobj = _dict.get ("Index");
if (indexobj instanceof PdfArray) {
// Content is an array of values
// - starting object, number of objects
Vector vec = ((PdfArray) indexobj).getContent();
// This is supposed to have a size of 2.
_index = new int[2];
PdfSimpleObject idx = (PdfSimpleObject) vec.get (0);
_index[0] = idx.getIntValue ();
idx = (PdfSimpleObject) vec.get (1);
_index[1] = idx.getIntValue ();
int vecSize = vec.size();

// Must be an even length array
if (vecSize % 2 != 0) {
return false;
}
_index_size = vecSize / 2;
_index = new index_range[_index_size];
int i = 0;
ListIterator<PdfSimpleObject> iter = (ListIterator<PdfSimpleObject>) vec.listIterator();
while(iter.hasNext()) {
PdfSimpleObject idx = iter.next();
_index[i].start = idx.getIntValue();
idx = iter.next();
_index[i++].len = idx.getIntValue();
}
}
else {
// Set up default index.
_index = new int[] { 0, _size };
_index_size = 1;
_index = new index_range[1];
_index[0].start = 0;
_index[0].len = _size;
}

// Get the field sizes.
Expand Down Expand Up @@ -142,6 +168,8 @@ public void initRead (RandomAccessFile raf)
strm.setFilters (_xstrm.getFilters ());
strm.initRead (raf);
_entriesRead = 0;
_read_range = 0;
_read_index = 0;

/* Calculate the total bytes per entry. This may have
* some utility. */
Expand Down Expand Up @@ -180,8 +208,12 @@ public boolean readNextObject () throws IOException
/* Loop till we find an actual object; we just count
* type 0's, which are free entries. */
wid = _fieldSizes[0];
if (_entriesRead++ >= _index[1]) {
return false; // Read full complement
_entriesRead += 1;
if (_read_index++ >= _index[_read_range].len) {
_read_index = 1;
if (_read_range++ >= _index_size) {
return false; // Read full complement
}
}
if (wid != 0) {
/* "Fields requiring more than one byte are stored
Expand Down Expand Up @@ -220,7 +252,7 @@ public boolean readNextObject () throws IOException
}

if (_objType != 0) {
_objNum = _index[0] + _entriesRead - 1;
_objNum = _index[_read_range].start + _read_index - 1;
return true;
}
++_freeCount;
Expand Down Expand Up @@ -262,8 +294,16 @@ public int getFreeCount ()
/** Returns the total object count. */
public int getNumObjects ()
{
return _index[0] + _index[1];
return _index[_index_size - 1].start + _index[_index_size - 1].len;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like the original method, this seems like a convoluted way to calculate the value already stored in _size. Is there a reason we can't simply return that?

}

public boolean isValidObject(int objNum) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add JavaDocs for any new methods.

for (int i = 0; i < _index_size; i++) {
if (objNum >= _index[i].start && objNum < _index[i].start + _index[i].len) return true;
}

return false;
}

/** Returns the offset of the last object object read.
* This is meaningful only if the last object read
Expand Down