Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Medline plain importer test #354

Merged
merged 1 commit into from
Jan 7, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by
- Fixed #473: Import/export to external database works again
- Fixed #526: OpenOffice/LibreOffice connection works again on Linux/OSX
- Fixed #533: Preview parsed incorrectly when regular expression was enabled
- Fixed: MedlinePlain Importer made more resistant for malformed entries
- Fixed #564: Cite command changes are immediately reflected in the push-to-application actions, and not only after restart

### Removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) th

for (String entry1 : entries) {

if (entry1.trim().isEmpty()) {
if (entry1.trim().isEmpty() || !entry1.contains("-")) {
continue;
}

Expand All @@ -113,16 +113,14 @@ public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) th
String[] fields = entry1.split("\n");

for (int j = 0; j < fields.length; j++) {
if ("".equals(fields[j])) {
continue;
}

StringBuilder current = new StringBuilder(fields[j]);
boolean done = false;

while (!done && (j < (fields.length - 1))) {
if (fields[j + 1].length() <= 4) {
System.out.println("aaa");
j++;
continue;
}
if (fields[j + 1].charAt(4) != '-') {
if ((current.length() > 0)
Expand All @@ -141,7 +139,7 @@ public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) th
String val = entry.substring(entry.indexOf('-') + 1).trim();
if ("PT".equals(lab)) {
val = val.toLowerCase();
if ("BOOK".equals(val)) {
if ("book".equals(val)) {
type = "book";
} else if ("journal article".equals(val)
|| "classical article".equals(val)
Expand All @@ -152,7 +150,7 @@ public List<BibEntry> importEntries(InputStream stream, OutputPrinter status) th
type = "article";
} else if ("clinical conference".equals(val)
|| "consensus development conference".equals(val)
|| "consensus development conference, NIH".equals(val)) {
|| "consensus development conference, nih".equals(val)) {
type = "conference";
} else if ("technical report".equals(val)) {
type = "techreport";
Expand Down Expand Up @@ -269,7 +267,8 @@ else if ("AID".equals(lab)) {
ArrayList<Object> toRemove = new ArrayList<>();
for (Map.Entry<String, String> key : hm.entrySet()) {
String content = key.getValue();
if ((content == null) || content.trim().isEmpty()) {
// content can never be null so only check if content is empty
if (content.trim().isEmpty()) {
toRemove.add(key.getKey());
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package net.sf.jabref.importer.fileformat;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import net.sf.jabref.Globals;
import net.sf.jabref.JabRefPreferences;
import net.sf.jabref.bibtex.BibtexEntryAssert;
import net.sf.jabref.bibtex.EntryTypes;
import net.sf.jabref.importer.OutputPrinterToNull;
import net.sf.jabref.model.entry.BibEntry;

public class MedlinePlainImporterTest {

private final InputStream emptyFileStream = streamForString("");
private MedlinePlainImporter importer;


private InputStream streamForString(String string) {
return new ByteArrayInputStream(string.getBytes(StandardCharsets.UTF_8));
}

@Before
public void setUp() throws Exception {
Globals.prefs = JabRefPreferences.getInstance();
importer = new MedlinePlainImporter();
}

@Test
public void testIsRecognizedFormat() throws Exception {
List<String> list = Arrays.asList("CopacImporterTest1.txt", "CopacImporterTest2.txt", "IsiImporterTest1.isi",
"IsiImporterTestInspec.isi", "IsiImporterTestWOS.isi", "IsiImporterTestMedline.isi");
for (String str : list) {
try (InputStream is = MedlinePlainImporter.class.getResourceAsStream(str)) {
Assert.assertFalse(importer.isRecognizedFormat(is));
}
}
}

@Test
public void testIsNotRecognizedFormat() throws Exception {
List<String> list = Arrays.asList("MedlinePlainImporterTestMultipleEntries.txt",
"MedlinePlainImporterTestCompleteEntry.txt", "MedlinePlainImporterTestMultiAbstract.txt",
"MedlinePlainImporterTestMultiTitle.txt", "MedlinePlainImporterTestDOI.txt",
"MedlinePlainImporterTestInproceeding.txt");
for (String str : list) {
try (InputStream is = MedlinePlainImporter.class.getResourceAsStream(str)) {
Assert.assertTrue(importer.isRecognizedFormat(is));
}
}
}

@Test
public void testIsNotEmptyFileRecognizedFormat() throws Exception {
Assert.assertFalse(importer.isRecognizedFormat(emptyFileStream));
}

@Test
public void testImportMultipleEntriesInSingleFile() throws Exception {
try (InputStream is = MedlinePlainImporter.class
.getResourceAsStream("MedlinePlainImporterTestMultipleEntries.txt")) {

List<BibEntry> entries = importer.importEntries(is, new OutputPrinterToNull());
Assert.assertEquals(7, entries.size());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are 7 entries then you should also test that all entries are parsed correctly and not just the first two. Probably it is more convenient to reduce the number of entries in the test file and thus simplify the test case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test should only check if multiple entries in one file could be read and imported together. The specific test for an entry is being checked in the testImportSingleEntriesInSingleFiles methode using the methode suggested by @koppor with checking imported entry vs. bibtex file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should verify all 7 entries or delete the last 5 in the test file.


BibEntry testEntry = entries.get(0);
Assert.assertEquals(EntryTypes.getType("article"), testEntry.getType());
Assert.assertNull(testEntry.getField("month"));
Assert.assertEquals("Long, Vicky and Marland, Hilary", testEntry.getField("author"));
Assert.assertEquals(
"From danger and motherhood to health and beauty: health advice for the factory girl in early twentieth-century Britain.",
testEntry.getField("title"));

testEntry = entries.get(1);
Assert.assertEquals(EntryTypes.getType("Conference"), testEntry.getType());
Assert.assertEquals("06", testEntry.getField("month"));
Assert.assertNull(testEntry.getField("author"));
Assert.assertNull(testEntry.getField("title"));

testEntry = entries.get(2);
Assert.assertEquals(EntryTypes.getType("Book"), testEntry.getType());
Assert.assertEquals(
"This is a Testtitle: This title should be appended: This title should also be appended. Another append to the Title? LastTitle",
testEntry.getField("title"));

testEntry = entries.get(3);
Assert.assertEquals(EntryTypes.getType("TechReport"), testEntry.getType());
Assert.assertNotNull(testEntry.getField("doi"));

testEntry = entries.get(4);
Assert.assertEquals(EntryTypes.getType("InProceedings"), testEntry.getType());
Assert.assertEquals("Inproceedings book title", testEntry.getField("booktitle"));

testEntry = entries.get(5);
Assert.assertEquals(EntryTypes.getType("Proceedings"), testEntry.getType());

testEntry = entries.get(6);
Assert.assertEquals(EntryTypes.getTypeOrDefault(""), testEntry.getType());

}
}

@Test
public void testEmptyFileImport() throws Exception {
List<BibEntry> emptyEntries = importer.importEntries(emptyFileStream, new OutputPrinterToNull());
Assert.assertEquals(Collections.emptyList(), emptyEntries);
}

@Test
public void testImportSingleEntriesInSingleFiles() throws IOException {
List<String> testFiles = Arrays.asList("MedlinePlainImporterTestCompleteEntry",
"MedlinePlainImporterTestMultiAbstract", "MedlinePlainImporterTestMultiTitle",
"MedlinePlainImporterTestDOI", "MedlinePlainImporterTestInproceeding");
for (String testFile : testFiles) {
String medlineFile = testFile + ".txt";
String bibtexFile = testFile + ".bib";
assertImportOfMedlineFileEqualsBibtexFile(medlineFile, bibtexFile);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please directly test that the imported entries are correct (as it is done above). The test as it stands could also fail because the bib-file import has a bug.

}
}

private void assertImportOfMedlineFileEqualsBibtexFile(String medlineFile, String bibtexFile) throws IOException {
try (InputStream is = MedlinePlainImporter.class.getResourceAsStream(medlineFile);
InputStream nis = MedlinePlainImporter.class.getResourceAsStream(bibtexFile)) {
List<BibEntry> entries = importer.importEntries(is, new OutputPrinterToNull());
Assert.assertNotNull(entries);
Assert.assertEquals(1, entries.size());
BibtexEntryAssert.assertEquals(nis, entries.get(0));
}
}

@Test
public void testMultiLineComments() throws IOException {
try (InputStream stream = streamForString("PMID-22664220" + "\n" + "CON - Comment1" + "\n" + "CIN - Comment2"
+ "\n" + "EIN - Comment3" + "\n" + "EFR - Comment4" + "\n" + "CRI - Comment5" + "\n" + "CRF - Comment6"
+ "\n" + "PRIN- Comment7" + "\n" + "PROF- Comment8" + "\n" + "RPI - Comment9" + "\n"
+ "RPF - Comment10" + "\n" + "RIN - Comment11" + "\n" + "ROF - Comment12" + "\n" + "UIN - Comment13"
+ "\n" + "UOF - Comment14" + "\n" + "SPIN- Comment15" + "\n" + "ORI - Comment16");) {
List<BibEntry> actualEntries = importer.importEntries(stream, new OutputPrinterToNull());

BibEntry expectedEntry = new BibEntry();
expectedEntry.setField("comment", "Comment1" + "\n" + "Comment2" + "\n" + "Comment3" + "\n" + "Comment4"
+ "\n" + "Comment5" + "\n" + "Comment6" + "\n" + "Comment7" + "\n" + "Comment8" + "\n" + "Comment9"
+ "\n" + "Comment10" + "\n" + "Comment11" + "\n" + "Comment12" + "\n" + "Comment13" + "\n"
+ "Comment14" + "\n" + "Comment15" + "\n" + "Comment16");
BibtexEntryAssert.assertEquals(Arrays.asList(expectedEntry), actualEntries);
}
}

@Test
public void testKeyWords() throws IOException {
try (InputStream stream = streamForString("PMID-22664795" + "\n" + "MH - Female" + "\n" + "OT - Male");) {
List<BibEntry> actualEntries = importer.importEntries(stream, new OutputPrinterToNull());

BibEntry expectedEntry = new BibEntry();
expectedEntry.setField("keywords", "Female, Male");
BibtexEntryAssert.assertEquals(Arrays.asList(expectedEntry), actualEntries);
}
}

@Test
public void testAllArticleTypes() throws IOException {
try (InputStream stream = streamForString("PMID-22664795" + "\n" + "PT - journal article" + "\n"
+ "PT - classical article" + "\n" + "PT - corrected and republished article" + "\n"
+ "PT - introductory journal article" + "\n" + "PT - newspaper article");) {
List<BibEntry> actualEntries = importer.importEntries(stream, new OutputPrinterToNull());

BibEntry expectedEntry = new BibEntry();
expectedEntry.setType(EntryTypes.getType("article"));
BibtexEntryAssert.assertEquals(Arrays.asList(expectedEntry), actualEntries);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can write it more clearly by separating the input pairs from the logic. Make it obvious what is getting tested. For instance:

for (int n = 1; n <= 5; n++) {
    String medlineFile = "MedlinePlainImporterTest" + n + ".txt";
    String bibtexFile = "MedlinePlainImporterTestBib" + n + ".bib";
    assertImportOfMedlineFileEqualsBibtexFile(medlineFile, bibtexFile);
}

And the assert method you can implement based on the remaining logic code.

What would even be better is to have a single test per file, describing what is special in this file to give the test a special name. You can still reuse the assertImportl... method, but instead of naming the files 1..5 and capturing all under a single test, you can create five tests with expressive names.

}

@Test
public void testGetFormatName() {
Assert.assertEquals("MedlinePlain", importer.getFormatName());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only the last (positive) assert statement is necessary.

}

@Test
public void testGetCLIId() {
Assert.assertEquals("medlineplain", importer.getCLIId());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@Article{,
Title = {From danger and motherhood to health and beauty: health advice for the factory girl in early twentieth-century Britain.},
Author = {Long, Vicky and Marland, Hilary},
Journal = {20 century British history},
Year = {2009},
Pages = {454-81},
Volume = {20},
Abstract = {A survey of government reports and the archives and journals of other agencies interested in industrial health in early twentieth-century Britain has led us to conclude that, in addition to apprehension about the potentially harmful impact of industrial work on the reproductive health of women, there was a great deal of interest in the health of young, unmarried girls in the workplace, particularly the factory. Adopting a broader time frame, we suggest that the First World War, with its emphasis on the reproductive health of women, was an anomalous experience in a broader trend which stressed the growing acceptability of women's work within industry. Concern with girls' health and welfare embraced hygiene, diet, exercise, recreation, fashion and beauty within and outside of the workplace, as well as the impact of the boredom and monotony associated with industrial work. The health problems of young women workers tended to be associated with behaviour and environment rather than biology, as were anxieties about the impact of work on morals, habits and character. Efforts to ensure that young female factory workers would be equipped to take their place as citizens and parents, we argue, often dovetailed rather than diverged with the 'boy labour' question.},
Address = {England},
ISSN = {0955-2359 (Linking)},
Keywords = {Attitude to Health, Employment/psychology, Female, Gender Identity, Great Britain, History, 20th Century, Humans, Occupational Health/*history, Reproductive Medicine/*history, Women, Working/*history/psychology}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
PMID- 20481061
OWN - NLM
STAT- MEDLINE
DA - 20100520
DCOM- 20100526
LR - 20150731
IS - 0955-2359 (Print)
IS - 0955-2359 (Linking)
VI - 20
IP - 4
DP - 2009
TI - From danger and motherhood to health and beauty: health advice for the factory
girl in early twentieth-century Britain.
PG - 454-81
AB - A survey of government reports and the archives and journals of other agencies


interested in industrial health in early twentieth-century Britain has led us to
conclude that, in addition to apprehension about the potentially harmful impact
of industrial work on the reproductive health of women, there was a great deal of
interest in the health of young, unmarried girls in the workplace, particularly
the factory. Adopting a broader time frame, we suggest that the First World War,
with its emphasis on the reproductive health of women, was an anomalous
experience in a broader trend which stressed the growing acceptability of women's
work within industry. Concern with girls' health and welfare embraced hygiene,
diet, exercise, recreation, fashion and beauty within and outside of the
workplace, as well as the impact of the boredom and monotony associated with
industrial work. The health problems of young women workers tended to be
associated with behaviour and environment rather than biology, as were anxieties
about the impact of work on morals, habits and character. Efforts to ensure that
young female factory workers would be equipped to take their place as citizens
and parents, we argue, often dovetailed rather than diverged with the 'boy
labour' question.
FAU - Long, Vicky
AU - Long V
AD - Centre for the History of Science, Technology and Medicine, University of
Manchester. Vicky.Long@manchester.ac.uk
FAU - Marland, Hilary
AU - Marland H
LA - eng
GR - /076053/Z/04/Z/Wellcome Trust/United Kingdom
GR - 076053/Wellcome Trust/United Kingdom
PT - Historical Article
PT - Journal Article
PT - Research Support, Non-U.S. Gov't
PL - England
TA - 20 Century Br Hist
JT - 20 century British history
JID - 9015384
SB - QIS
MH - Attitude to Health
MH - Employment/psychology
MH - Female
MH - Gender Identity
MH - Great Britain
MH - History, 20th Century
MH - Humans
MH - Occupational Health/*history
MH - Reproductive Medicine/*history
MH - Women, Working/*history/psychology
PMC - PMC4513071
MID - EMS54115
OID - NLM: EMS54115
OID - NLM: PMC4513071
EDAT- 2009/01/01 00:00
MHDA- 2010/05/27 06:00
CRDT- 2010/05/21 06:00
PST - ppublish
SO - 20 Century Br Hist. 2009;20(4):454-81.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@TechReport{,
DOI = {10.1016/j.cpr.2005.02.002}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
PMID-22664220
PT - Technical Report
AID - doi:10.1016/j.cpr.2005.02.002
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@InProceedings{,
Booktitle = {Inproceedings book title}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
PMID-22664238
PT - Editorial
JT - Inproceedings book title
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@Conference{,
Year = {2013},
Editor = {Editor, Some and test data, Some},
Month = {06},
Abstract = {Old Abstract
new abstract},
Comment = {First Comment},
Journal = {Test Journal}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
PMC -22664230
TI-
BTI -
FAU -
FED - Some Editor
PT - Clinical Conference
PT - Consensus Development Conference
PT - Consensus Development Conference, NIH
FED - Some test data
JT - Test Journal
PG -
PL -
IS -
VI -
AB - Old Abstract
AB - new abstract
DP - 2013 06 10
MH -
AID -
CON - First Comment
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@Book{,
Title = {This is a Testtitle: This title should be appended: This title should also be appended. Another append to the Title? LastTitle}
}
Loading