-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
adding DOAB to web search #8598
Changes from 3 commits
e3c9f95
7772eb2
f0c7a1d
d33c30a
16b7cc2
fa880cf
3e71137
4042bcf
2e8c7ae
e746f82
e809885
d677bf1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.net.MalformedURLException; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.StringJoiner; | ||
|
||
import org.jabref.logic.importer.FetcherException; | ||
import org.jabref.logic.importer.Parser; | ||
import org.jabref.logic.importer.SearchBasedParserFetcher; | ||
import org.jabref.logic.importer.fetcher.transformers.DefaultQueryTransformer; | ||
import org.jabref.logic.importer.util.JsonReader; | ||
import org.jabref.model.entry.Author; | ||
import org.jabref.model.entry.AuthorList; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
|
||
import kong.unirest.json.JSONArray; | ||
import kong.unirest.json.JSONObject; | ||
import org.apache.http.client.utils.URIBuilder; | ||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; | ||
|
||
/* | ||
fetches books from https://www.doabooks.org/ through their API at | ||
https://www.doabooks.org/en/resources/metadata-harvesting-and-content-dissemination | ||
*/ | ||
|
||
public class DOABFetcher implements SearchBasedParserFetcher { | ||
private static final String SEARCH_URL = "https://directory.doabooks.org/rest/search?"; | ||
// private static final String PEER_REVIEW_URL = " https://directory.doabooks.org/rest/peerReviews?"; | ||
|
||
@Override | ||
public String getName() { | ||
return "DOAB"; | ||
} | ||
|
||
@Override | ||
public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { | ||
URIBuilder builder = new URIBuilder(SEARCH_URL); | ||
String query = new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse(""); | ||
// adding quotations for the query for more specified results | ||
// without the quotation the results returned are not relevant to the query | ||
query = ("\"".concat(query)).concat("\""); | ||
builder.addParameter("query", query); | ||
builder.addParameter("expand", "metadata"); | ||
|
||
return builder.build().toURL(); | ||
} | ||
|
||
@Override | ||
public Parser getParser() { | ||
return InputStream -> { | ||
// can't use this method JsonReader.toJsonObject(inputStream) because the results are sent in an array | ||
// like format resulting in an error when trying to convert them into a json object | ||
// created a similar method suitable for this case "toJsonArray" | ||
JSONArray response = JsonReader.toJsonArray(InputStream); | ||
if (response.isEmpty()) { | ||
return Collections.emptyList(); | ||
} | ||
// single result case | ||
if (response.length() == 1) { | ||
|
||
// the information used for bibtex entries are in an array inside the resulting jsonarray | ||
// see this query for reference https://directory.doabooks.org/rest/search?query="i open fire"&expand=metadata | ||
JSONArray metadataArray = response.getJSONObject(0).getJSONArray("metadata"); | ||
BibEntry entry = JsonToBibEntry(metadataArray); | ||
return Collections.singletonList(entry); | ||
} | ||
// multiple results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment can be removed, shold be obvious |
||
List<BibEntry> entries = new ArrayList<>(response.length()); | ||
for (int i = 0; i < response.length(); i++) { | ||
JSONArray metadataArray = response.getJSONObject(i).getJSONArray("metadata"); | ||
BibEntry entry = JsonToBibEntry(metadataArray); | ||
entries.add(entry); | ||
} | ||
return entries; | ||
}; | ||
} | ||
|
||
private BibEntry JsonToBibEntry(JSONArray metadataArray) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lowercase method name |
||
BibEntry entry = new BibEntry(); | ||
List<Author> authorsList = new ArrayList<>(); | ||
List<Author> editorsList = new ArrayList<>(); | ||
StringJoiner keywordJoiner = new StringJoiner(","); | ||
for (int i = 0; i < metadataArray.length(); i++) { | ||
JSONObject dataObject = metadataArray.getJSONObject(i); | ||
switch (dataObject.getString("key")) { | ||
case "dc.contributor.author" -> authorsList.add(toAuthor(dataObject.getString("value"))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at it a bit closer, it seems that I've spoken too soon. E.g.,
is a bit weird. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also follow-up in #8576 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the data itself returned from the API is in xml format, and the actual information that I used for the Bibtex entries are inside an array called metadata which you can add or remove from the results, see this https://directory.doabooks.org/rest/search?query=%22the+deliverance+of+open+access+books%22, so I agree with you it is a little bit weird There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Mohamadi98 Most entries seem to be already in correct format, however some seem to indicate Ediors in the authors field
The other normal case:
It's a bit of a mess with a list of list of authors... and the naming doesn't really help (yep, it's a mess^^) But if you're stuck, just ask @k3KAW8Pnf7mkmdSMPHz27 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The AuthorList.parse can also handle names not in (LastName, FirstName) format you will see in one of the tests in the latest commit that there are names that returns from the API in (FirstName LastName) and are parsed correctly, so this nice |
||
case "dc.type" -> entry.setField(StandardField.TYPE, | ||
dataObject.getString("value")); | ||
case "dc.date.issued" -> entry.setField(StandardField.YEAR, String.valueOf( | ||
dataObject.getInt("value"))); | ||
case "oapen.identifier.doi" -> entry.setField(StandardField.DOI, | ||
dataObject.getString("value")); | ||
case "dc.title" -> entry.setField(StandardField.TITLE, | ||
dataObject.getString("value")); | ||
case "oapen.pages" -> entry.setField(StandardField.PAGES, String.valueOf( | ||
dataObject.getInt("value"))); | ||
case "dc.description.abstract" -> entry.setField(StandardField.ABSTRACT, | ||
dataObject.getString("value")); | ||
case "dc.language" -> entry.setField(StandardField.LANGUAGE, | ||
dataObject.getString("value")); | ||
case "publisher.name" -> entry.setField(StandardField.PUBLISHER, | ||
dataObject.getString("value")); | ||
case "dc.identifier.uri" -> entry.setField(StandardField.URI, | ||
dataObject.getString("value")); | ||
case "dc.subject.other" -> keywordJoiner.add(dataObject.getString("value")); | ||
case "dc.contributor.editor" -> editorsList.add(toAuthor(dataObject.getString("value"))); | ||
} | ||
} | ||
entry.setField(StandardField.AUTHOR, toAuthorList(authorsList)); | ||
entry.setField(StandardField.EDITOR, toAuthorList(editorsList)); | ||
entry.setField(StandardField.KEYWORDS, String.valueOf(keywordJoiner)); | ||
return entry; | ||
} | ||
|
||
private Author toAuthor(String author) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if you have multiple authors? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I created a list from the Author class and all authors are added to it then converted to an AuthorList I will add a test case for multiple authors, it works the same as multiple editors there a test case provide for that, but I agree with you I think there is a better solution for that, can you describe what AuthorList.parse () actually does and how to use it, I am not sure I completely understand it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The nutshell interpretation of
For this case, I'd probably go with
even if we should only be getting one author from doab, and it isn't in bibtex format. Probably preprocess by checking for (Ed.) as in #8598 (comment) Is that explanation/example useful? You seem to already have nailed down how to deal with output from AuthorList. Do you have any questions regarding it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, that was useful, and definitely a cleaner solution, I will implement this, and also start thinking about solutions to handle edgy cases like the one you mentioned here #8598 (comment) especially the author name that starts with Fatima which is formatted differently from the other authors. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also noticed something for some results the field for the number of pages can come in a different format like this result here https://directory.doabooks.org/rest/search?query=%22UAV%E2%80%90Based%20Remote%20Sensing%20Volume%202%22&expand=metadata |
||
String[] names = author.split(" "); | ||
names[0] = String.valueOf(new StringBuilder(names[0]).deleteCharAt(names[0].length() - 1)); | ||
return new Author(names[1], "", "", names[0], ""); | ||
} | ||
|
||
private String toAuthorList(List<Author> authorsList) { | ||
return AuthorList.of(authorsList).getAsFirstLastNamesWithAnd(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,130 @@ | ||||||
package org.jabref.logic.importer.fetcher; | ||||||
|
||||||
import java.util.List; | ||||||
|
||||||
import org.jabref.logic.importer.FetcherException; | ||||||
import org.jabref.model.entry.BibEntry; | ||||||
import org.jabref.model.entry.field.StandardField; | ||||||
import org.jabref.testutils.category.FetcherTest; | ||||||
|
||||||
import org.junit.jupiter.api.BeforeEach; | ||||||
import org.junit.jupiter.api.Test; | ||||||
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||||||
import static org.junit.jupiter.api.Assertions.assertFalse; | ||||||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||||||
|
||||||
@FetcherTest | ||||||
public class DOABFetcherTest { | ||||||
private DOABFetcher fetcher; | ||||||
private BibEntry David_Opal; | ||||||
private BibEntry Ronald_Snijder; | ||||||
private BibEntry Andrew_Perrin; | ||||||
|
||||||
@BeforeEach | ||||||
public void setUp() throws Exception { | ||||||
fetcher = new DOABFetcher(); | ||||||
|
||||||
David_Opal = new BibEntry(); | ||||||
David_Opal.setField(StandardField.AUTHOR, "David Pol"); | ||||||
David_Opal.setField(StandardField.TITLE, "I Open Fire"); | ||||||
David_Opal.setField(StandardField.TYPE, "book"); | ||||||
David_Opal.setField(StandardField.DOI, "10.21983/P3.0086.1.00"); | ||||||
David_Opal.setField(StandardField.PAGES, "56"); | ||||||
David_Opal.setField(StandardField.YEAR, "2014"); | ||||||
David_Opal.setField(StandardField.URI, "https://directory.doabooks.org/handle/20.500.12854/34739"); | ||||||
David_Opal.setField(StandardField.ABSTRACT, "David Pol presents an ontology of war in the form of " + | ||||||
"the lyric poem. “Do you hear what I’m shooting at you?” In I Open Fire, all relation is " + | ||||||
"warfare. Minefields compromise movement. Intention aims. Touch burns. Sex explodes bodies. " + | ||||||
"Time ticks in bomb countdowns. Sound is sirens. Plenitude is debris. All of it under " + | ||||||
"surveillance. “My world is critically injured. It was ambushed.” The poems in this book perform" + | ||||||
" the reductions and repetitions endemic to war itself, each one returning the reader to the same," + | ||||||
" unthinkable place in which the range of human experience has been so flattened that, despite all" + | ||||||
" the explosive action, “Almost nothing is happening.” Against this backdrop, we continue to fall" + | ||||||
" in love. But Pol’s poems remind us that this is no reason for optimism. Does love offer a" + | ||||||
" delusional escape from war, or are relationships the very definition of combat? These poems take" + | ||||||
" up the themes of love, sex, marriage, touch, hope — in short, the many dimensions of" + | ||||||
" interpersonal connection — in a world in unprecedentedly critical condition. “And when the night" + | ||||||
" goes off the shock wave throws us apart toward each other.”"); | ||||||
David_Opal.setField(StandardField.LANGUAGE, "English"); | ||||||
David_Opal.setField(StandardField.KEYWORDS, "poetry,love,warfare"); | ||||||
David_Opal.setField(StandardField.PUBLISHER, "punctum books"); | ||||||
|
||||||
Ronald_Snijder = new BibEntry(); | ||||||
Ronald_Snijder.setField(StandardField.AUTHOR, "Ronald Snijder"); | ||||||
Ronald_Snijder.setField(StandardField.TITLE, "The deliverance of open access books"); | ||||||
Ronald_Snijder.setField(StandardField.TYPE, "book"); | ||||||
Ronald_Snijder.setField(StandardField.DOI, "10.26530/OAPEN_1004809"); | ||||||
Ronald_Snijder.setField(StandardField.PAGES, "234"); | ||||||
Ronald_Snijder.setField(StandardField.YEAR, "2019"); | ||||||
Ronald_Snijder.setField(StandardField.URI, "https://directory.doabooks.org/handle/20.500.12854/26303"); | ||||||
Ronald_Snijder.setField(StandardField.ABSTRACT, "In many scholarly disciplines, books - not articles" + | ||||||
" - are the norm. As print runs become smaller, the question arises whether publishing monographs" + | ||||||
" in open access helps to make their contents globally accessible. To answer this question, the" + | ||||||
" results of multiple studies on the usage of open access books are presented. The research" + | ||||||
" focuses on three areas: economic viability; optimization of open access monographs" + | ||||||
" infrastructure and measuring the effects of open access in terms of scholarly impact and" + | ||||||
" societal influence. Each chapter reviews a different aspect: book sales, digital dissemination," + | ||||||
" open licenses, user communities, measuring usage, developing countries and the effects on" + | ||||||
" citations and social media."); | ||||||
Ronald_Snijder.setField(StandardField.LANGUAGE, "English"); | ||||||
Ronald_Snijder.setField(StandardField.KEYWORDS, "Open Access,Monographs,OAPEN Library," + | ||||||
"Directory of Open Access Books"); | ||||||
Ronald_Snijder.setField(StandardField.PUBLISHER, "Amsterdam University Press"); | ||||||
|
||||||
Andrew_Perrin = new BibEntry(); | ||||||
Andrew_Perrin.setField(StandardField.EDITOR, "Andrew Perrin and Loren Stuckenbruck"); | ||||||
Andrew_Perrin.setField(StandardField.TITLE, "Four Kingdom Motifs before and beyond the Book of Daniel"); | ||||||
Andrew_Perrin.setField(StandardField.TYPE, "book"); | ||||||
Andrew_Perrin.setField(StandardField.DOI, "10.1163/9789004443280"); | ||||||
Andrew_Perrin.setField(StandardField.PAGES, "354"); | ||||||
Andrew_Perrin.setField(StandardField.YEAR, "2020"); | ||||||
Andrew_Perrin.setField(StandardField.URI, "https://directory.doabooks.org/handle/20.500.12854/68086"); | ||||||
Andrew_Perrin.setField(StandardField.ABSTRACT, "The four kingdoms motif enabled writers of various " + | ||||||
"cultures, times, and places, to periodize history as the staged succession of empires " + | ||||||
"barrelling towards an utopian age. The motif provided order to lived experiences under empire" + | ||||||
" (the present), in view of ancestral traditions and cultural heritage (the past), and inspired" + | ||||||
" outlooks assuring hope, deliverance, and restoration (the future). Four Kingdom Motifs before" + | ||||||
" and beyond the Book of Daniel includes thirteen essays that explore the reach and redeployment" + | ||||||
" of the motif in classical and ancient Near Eastern writings, Jewish and Christian scriptures," + | ||||||
" texts among the Dead Sea Scrolls, Apocrypha and pseudepigrapha, depictions in European" + | ||||||
" architecture and cartography, as well as patristic, rabbinic, Islamic, and African writings " + | ||||||
"from antiquity through the Mediaeval eras. Readership: Advanced students and scholars of the " + | ||||||
"textual formation, apocalyptic theology, and historiographies of the book of Daniel and its " + | ||||||
"diverse reception by writers and communities."); | ||||||
Andrew_Perrin.setField(StandardField.LANGUAGE, "English"); | ||||||
Andrew_Perrin.setField(StandardField.KEYWORDS, "Religion"); | ||||||
Andrew_Perrin.setField(StandardField.PUBLISHER, "Brill"); | ||||||
|
||||||
} | ||||||
|
||||||
@Test | ||||||
public void TestGetName() { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Method names should start with a lower case:
Suggested change
|
||||||
assertEquals("DOAB", fetcher.getName()); | ||||||
} | ||||||
|
||||||
@Test | ||||||
public void TestPerformSearch() throws FetcherException { | ||||||
List<BibEntry> entries; | ||||||
entries = fetcher.performSearch("i open fire"); | ||||||
assertFalse(entries.isEmpty()); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. assertFalse and assertTrue make it hard to see why a test fails. In thise casse it's better you compare the entry collections. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, sure |
||||||
assertTrue(entries.contains(David_Opal)); | ||||||
} | ||||||
|
||||||
@Test | ||||||
public void TestPerformSearch2() throws FetcherException { | ||||||
List<BibEntry> entries; | ||||||
entries = fetcher.performSearch("the deliverance of open access books"); | ||||||
assertFalse(entries.isEmpty()); | ||||||
assertTrue(entries.contains(Ronald_Snijder)); | ||||||
} | ||||||
|
||||||
@Test | ||||||
public void TestPerformSearch3() throws FetcherException { | ||||||
List<BibEntry> entries; | ||||||
entries = fetcher.performSearch("Four Kingdom Motifs before and beyond the Book of Daniel"); | ||||||
assertFalse(entries.isEmpty()); | ||||||
assertTrue(entries.contains(Andrew_Perrin)); | ||||||
} | ||||||
|
||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure how you want to integrate it, should it be a different fitcher, DOABPeerReviews for example or maybe in the same fitcher but the user has two options
I am actually not sure if I understand what peer reviews mean in the world of books.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would ignore it for the moment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove the comment then