Skip to content

Commit

Permalink
Fix ArrayIndexOutOfBoundsException on second pdf import (#4426)
Browse files Browse the repository at this point in the history
* Fix ArrayIndexOutOfBoundsException on second pdf import

The variable formally known as i is a global variable which had -1 after the first run and therefore threw an exception

* add changelog and fix

* add test
  • Loading branch information
Siedlerchr authored Oct 30, 2018
1 parent 11a62dd commit 583c61f
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 38 deletions.
6 changes: 3 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- Files without a defined external file type are now directly opened with the default application of the operating system
- We streamlined the process to rename and move files by removing the confirmation dialogs.
- We removed the redundant new lines of markings and wrapped the summary in the File annotation tab. [#3823](https://github.com/JabRef/jabref/issues/3823)
- We add auto url formatting when user paste link to URL field in entry editor. [#254](https://github.com/koppor/jabref/issues/254)
- We add auto url formatting when user paste link to URL field in entry editor. [koppor#254](https://github.com/koppor/jabref/issues/254)
- We added a minimal height for the entry editor so that it can no longer be hidden by accident. [#4279](https://github.com/JabRef/jabref/issues/4279)
- We added a new keyboard shortcut so that the entry editor could be closed by <kbd>Ctrl<kbd> + <kbd>E<kbd>. [#4222] (https://github.com/JabRef/jabref/issues/4222)
- We added an option in the preference dialog box, that allows user to pick the dark or light theme option. [#4130] (https://github.com/JabRef/jabref/issues/4130)
Expand Down Expand Up @@ -74,8 +74,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- We fixed an issue where files added via the "Attach file" contextmenu of an entry were not made relative. [#4201](https://github.com/JabRef/jabref/issues/4201) and [#4241](https://github.com/JabRef/jabref/issues/4241)
- We fixed an issue where author list parser can't generate bibtex for Chinese author. [#4169](https://github.com/JabRef/jabref/issues/4169)
- We fixed an issue where the list of XMP Exclusion fields in the preferences was not be saved [#4072](https://github.com/JabRef/jabref/issues/4072)
- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [#4367](https://github.com/JabRef/jabref/pull/4367)

- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [koppor#328](https://github.com/koppor/jabref/issues/328)
- We fixed an issue where only one PDF file could be imported [#4422](https://github.com/JabRef/jabref/issues/4422)



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ public class PdfContentImporter extends Importer {
// input lines into several lines
private String[] lines;
// current index in lines
private int i;
private int lineIndex;
private String curString;
private String year;


public PdfContentImporter(ImportFormatPreferences importFormatPreferences) {
this.importFormatPreferences = importFormatPreferences;

}
/**
* Removes all non-letter characters at the end
Expand Down Expand Up @@ -225,17 +226,19 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
// the different lines are joined into one and thereby separated by " "
lines = firstPageContents.split(System.lineSeparator());

lineIndex = 0; //to prevent array index out of bounds exception on second run we need to reset i to zero

proceedToNextNonEmptyLine();
if (i >= lines.length) {
if (lineIndex >= lines.length) {
// PDF could not be parsed or is empty
// return empty list
return new ParserResult();
}

// we start at the current line
curString = lines[i];
curString = lines[lineIndex];
// i might get incremented later and curString modified, too
i = i + 1;
lineIndex = lineIndex + 1;

String author;
String editor = null;
Expand Down Expand Up @@ -279,10 +282,10 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {

// after title: authors
author = null;
while ((i < lines.length) && !"".equals(lines[i])) {
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
// author names are unlikely to be lines among different lines
// treat them line by line
curString = streamlineNames(lines[i]);
curString = streamlineNames(lines[lineIndex]);
if (author == null) {
author = curString;
} else {
Expand All @@ -292,38 +295,38 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
author = author.concat(" and ").concat(curString);
}
}
i++;
lineIndex++;
}
curString = "";
i++;
lineIndex++;

// then, abstract and keywords follow
while (i < lines.length) {
curString = lines[i];
while (lineIndex < lines.length) {
curString = lines[lineIndex];
if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
if (curString.length() == "Abstract".length()) {
// only word "abstract" found -- skip line
curString = "";
} else {
curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
}
i++;
lineIndex++;
// fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator
// whereas we need linebreak as separator
while ((i < lines.length) && !"".equals(lines[i])) {
curString = curString.concat(lines[i]).concat(System.lineSeparator());
i++;
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
curString = curString.concat(lines[lineIndex]).concat(System.lineSeparator());
lineIndex++;
}
abstractT = curString.trim();
i++;
lineIndex++;
} else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
if (curString.length() == "Keywords".length()) {
// only word "Keywords" found -- skip line
curString = "";
} else {
curString = curString.substring("Keywords".length() + 1).trim();
}
i++;
lineIndex++;
fillCurStringWithNonEmptyLines();
keywords = removeNonLettersAtEnd(curString);
} else {
Expand All @@ -340,18 +343,18 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
}
}

i++;
lineIndex++;
proceedToNextNonEmptyLine();
}
}

i = lines.length - 1;
lineIndex = lines.length - 1;

// last block: DOI, detailed information
// sometimes, this information is in the third last block etc...
// therefore, read until the beginning of the file

while (i >= 0) {
while (lineIndex >= 0) {
readLastBlock();
// i now points to the block before or is -1
// curString contains the last block, separated by " "
Expand Down Expand Up @@ -522,8 +525,8 @@ private void extractYear() {
* proceed to next non-empty line
*/
private void proceedToNextNonEmptyLine() {
while ((i < lines.length) && "".equals(lines[i].trim())) {
i++;
while ((lineIndex < lines.length) && "".equals(lines[lineIndex].trim())) {
lineIndex++;
}
}

Expand All @@ -540,16 +543,16 @@ private void proceedToNextNonEmptyLine() {
private void fillCurStringWithNonEmptyLines() {
// ensure that curString does not end with " "
curString = curString.trim();
while ((i < lines.length) && !"".equals(lines[i])) {
String curLine = lines[i].trim();
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
String curLine = lines[lineIndex].trim();
if (!"".equals(curLine)) {
if (!curString.isEmpty()) {
// insert separating space if necessary
curString = curString.concat(" ");
}
curString = curString.concat(lines[i]);
curString = curString.concat(lines[lineIndex]);
}
i++;
lineIndex++;
}

proceedToNextNonEmptyLine();
Expand All @@ -563,22 +566,22 @@ private void fillCurStringWithNonEmptyLines() {
* invariant before/after: i points to line before the last handled block
*/
private void readLastBlock() {
while ((i >= 0) && "".equals(lines[i].trim())) {
i--;
while ((lineIndex >= 0) && "".equals(lines[lineIndex].trim())) {
lineIndex--;
}
// i is now at the end of a block

int end = i;
int end = lineIndex;

// find beginning
while ((i >= 0) && !"".equals(lines[i])) {
i--;
while ((lineIndex >= 0) && !"".equals(lines[lineIndex])) {
lineIndex--;
}
// i is now the line before the beginning of the block
// this fulfills the invariant

curString = "";
for (int j = i + 1; j <= end; j++) {
for (int j = lineIndex + 1; j <= end; j++) {
curString = curString.concat(lines[j].trim());
if (j != end) {
curString = curString.concat(" ");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.logic.importer.fileformat;

import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -10,6 +9,8 @@
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.util.StandardFileType;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibtexEntryTypes;
import org.jabref.model.entry.FieldName;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand All @@ -34,14 +35,30 @@ public void testsGetExtensions() {
@Test
public void testGetDescription() {
assertEquals(
"PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
importer.getDescription());
"PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
importer.getDescription());
}

@Test
public void doesNotHandleEncryptedPdfs() throws URISyntaxException {
public void doesNotHandleEncryptedPdfs() throws Exception {
Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/encrypted.pdf").toURI());
List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
assertEquals(Collections.emptyList(), result);
}

@Test
public void importTwiceWorksAsExpected() throws Exception {
Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/minimal.pdf").toURI());
List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();

BibEntry expected = new BibEntry(BibtexEntryTypes.INPROCEEDINGS);
expected.setField(FieldName.AUTHOR, "1 ");
expected.setField(FieldName.TITLE, "Hello World");

List<BibEntry> resultSecondImport = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
assertEquals(Collections.singletonList(expected), result);
assertEquals(Collections.singletonList(expected), resultSecondImport);

}

}

0 comments on commit 583c61f

Please sign in to comment.