JabRef · koppor · Nov 4, 2020 · Dec 13, 2020 · Dec 13, 2020 · Dec 13, 2020
diff --git a/docs/advanced-reading/fetchers.md b/docs/advanced-reading/fetchers.md
@@ -14,6 +14,17 @@ Fetchers are the implementation of the [search using online services](https://do
 
 On Windows, you have to log-off and log-on to let IntelliJ know about the environment variable change. Execute the gradle task "processResources" in the group "others" within IntelliJ to ensure the values have been correctly written. Now, the fetcher tests should run without issues.
 
+## Change the log levels to enable debugging
+
+1. Open `src/test/resources/log4j2-test.xml`
+2. Add following XML snippet
+
+   ```xml
+           <Logger name="org.jabref.logic.importer.fetcher" level="DEBUG">
+              <AppenderRef ref="CONSOLE"/>
+           </Logger>
+    ```
+
 ## Background on embedding the keys in JabRef
 
 The keys are placed into the `build.properties` file.

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
@@ -41,6 +41,7 @@
 public class GoogleScholar implements FulltextFetcher, SearchBasedFetcher {
     private static final Logger LOGGER = LoggerFactory.getLogger(GoogleScholar.class);
 
+    private static final Pattern LINK_TO_SUBPAGE_PATTERN = Pattern.compile("data-clk-atid=\"([^\"]*)\"");
     private static final Pattern LINK_TO_BIB_PATTERN = Pattern.compile("(https:\\/\\/scholar.googleusercontent.com\\/scholar.bib[^\"]*)");
 
     private static final String BASIC_SEARCH_URL = "https://scholar.google.ch/scholar?";
@@ -128,11 +129,11 @@ public Optional<HelpFile> getHelpPage() {
 
     @Override
     public List<BibEntry> performSearch(String query) throws FetcherException {
-        LOGGER.debug("Using URL {}", query);
+        LOGGER.debug("Using query {}", query);
         obtainAndModifyCookie();
         List<BibEntry> foundEntries = new ArrayList<>(20);
 
-        URIBuilder uriBuilder = null;
+        URIBuilder uriBuilder;
         try {
             uriBuilder = new URIBuilder(BASIC_SEARCH_URL);
         } catch (URISyntaxException e) {
@@ -143,14 +144,16 @@ public List<BibEntry> performSearch(String query) throws FetcherException {
         uriBuilder.addParameter("btnG", "Search");
         uriBuilder.addParameter("q", query);
         String queryURL = uriBuilder.toString();
+        LOGGER.debug("Using URL {}", queryURL);
 
         try {
             addHitsFromQuery(foundEntries, queryURL);
         } catch (IOException e) {
             // if there are too much requests from the same IP address google is answering with a 503 and redirecting to a captcha challenge
             // The caught IOException looks for example like this:
             // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0
-            if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) {
+            if (e.getMessage().contains("Server returned HTTP response code: 403 for URL") ||
+                    (e.getMessage().contains("Server returned HTTP response code: 503 for URL"))) {
                 throw new FetcherException("Fetching from Google Scholar at URL " + queryURL + " failed.",
                         Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e);
             } else {
@@ -214,22 +217,42 @@ private String constructComplexQueryString(ComplexSearchQuery complexSearchQuery
     }
 
     private void addHitsFromQuery(List<BibEntry> entryList, String queryURL) throws IOException, FetcherException {
+        LOGGER.debug("Downloading from {}", queryURL);
         String content = new URLDownload(queryURL).asString();
 
         if (needsCaptcha(content)) {
             throw new FetcherException("Fetching from Google Scholar failed: Captacha hit at " + queryURL + ".",
                     Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), null);
         }
 
-        Matcher matcher = LINK_TO_BIB_PATTERN.matcher(content);
+        Matcher matcher = LINK_TO_SUBPAGE_PATTERN.matcher(content);
+        if (!matcher.find()) {
+            LOGGER.debug("No data-clk-atid found in html {}", content);
+            return;
+        }
+
+        String infoPageUrl = BASIC_SEARCH_URL + "q=info:" + matcher.group(1) + ":scholar.google.com/&output=cite&scirp=0&hl=en";
+        LOGGER.debug("Using infoPageUrl {}", infoPageUrl);
+        URLDownload infoPageUrlDownload = new URLDownload(infoPageUrl);
+        LOGGER.debug("Downloading from {}", infoPageUrl);
+        String infoPageContent = infoPageUrlDownload.asString();
+
+        matcher = LINK_TO_BIB_PATTERN.matcher(infoPageContent);
+        boolean found = false;
         while (matcher.find()) {
+            found = true;
             String citationsPageURL = matcher.group().replace("&amp;", "&");
+            LOGGER.debug("Using citationsPageURL {}", citationsPageURL);
             BibEntry newEntry = downloadEntry(citationsPageURL);
             entryList.add(newEntry);
         }
+        if (!found) {
+            LOGGER.debug("Did not found pattern in html {}", infoPageContent);
+        }
     }
 
     private BibEntry downloadEntry(String link) throws IOException, FetcherException {
+        LOGGER.debug("Downloading from {}", link);
         String downloadedContent = new URLDownload(link).asString();
         BibtexParser parser = new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
         ParserResult result = parser.parse(new StringReader(downloadedContent));

diff --git a/src/main/java/org/jabref/logic/net/URLDownload.java b/src/main/java/org/jabref/logic/net/URLDownload.java
@@ -235,6 +235,10 @@ public String asString() throws IOException {
         return asString(StandardCharsets.UTF_8);
     }
 
+    /**
+     * Returns a modifiable list of cookies related to the URL of this URLDownload.
+     * Any modifications will be used at later calls
+     */
     public List<HttpCookie> getCookieFromUrl() throws IOException {
         CookieManager cookieManager = new CookieManager();
         CookieHandler.setDefault(cookieManager);

diff --git a/src/test/java/org/jabref/logic/importer/fetcher/GoogleScholarTest.java b/src/test/java/org/jabref/logic/importer/fetcher/GoogleScholarTest.java
@@ -26,64 +26,64 @@
 @FetcherTest
 class GoogleScholarTest implements SearchBasedFetcherCapabilityTest {
 
-    private GoogleScholar finder;
-    private BibEntry entry;
+    private GoogleScholar fetcher;
 
     @BeforeEach
     void setUp() {
         ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class);
         when(importFormatPreferences.getFieldContentFormatterPreferences()).thenReturn(
                 mock(FieldContentFormatterPreferences.class));
-        finder = new GoogleScholar(importFormatPreferences);
-        entry = new BibEntry();
+        fetcher = new GoogleScholar(importFormatPreferences);
     }
 
     @Test
     @DisabledOnCIServer("CI server is blocked by Google")
     void linkFound() throws IOException, FetcherException {
-        entry.setField(StandardField.TITLE, "Towards Application Portability in Platform as a Service");
+        BibEntry entry = new BibEntry()
+                .withField(StandardField.TITLE, "Towards Application Portability in Platform as a Service");
 
         assertEquals(
                 Optional.of(new URL("https://www.uni-bamberg.de/fileadmin/uni/fakultaeten/wiai_lehrstuehle/praktische_informatik/Dateien/Publikationen/sose14-towards-application-portability-in-paas.pdf")),
-                finder.findFullText(entry)
+                fetcher.findFullText(entry)
         );
     }
 
     @Test
     @DisabledOnCIServer("CI server is blocked by Google")
     void noLinkFound() throws IOException, FetcherException {
-        entry.setField(StandardField.TITLE, "Curriculum programme of career-oriented java specialty guided by principles of software engineering");
+        BibEntry entry = new BibEntry()
+                .withField(StandardField.TITLE, "Curriculum programme of career-oriented java specialty guided by principles of software engineering");
 
-        assertEquals(Optional.empty(), finder.findFullText(entry));
+        assertEquals(Optional.empty(), fetcher.findFullText(entry));
     }
 
     @Test
     @DisabledOnCIServer("CI server is blocked by Google")
     void findSingleEntry() throws FetcherException {
-        entry.setType(StandardEntryType.InProceedings);
-        entry.setCitationKey("geiger2013detecting");
-        entry.setField(StandardField.TITLE, "Detecting Interoperability and Correctness Issues in BPMN 2.0 Process Models.");
-        entry.setField(StandardField.AUTHOR, "Geiger, Matthias and Wirtz, Guido");
-        entry.setField(StandardField.BOOKTITLE, "ZEUS");
-        entry.setField(StandardField.YEAR, "2013");
-        entry.setField(StandardField.PAGES, "41--44");
+        BibEntry entry = new BibEntry(StandardEntryType.InProceedings)
+                .withCitationKey("geiger2013detecting")
+                .withField(StandardField.TITLE, "Detecting Interoperability and Correctness Issues in BPMN 2.0 Process Models.")
+                .withField(StandardField.AUTHOR, "Geiger, Matthias and Wirtz, Guido")
+                .withField(StandardField.BOOKTITLE, "ZEUS")
+                .withField(StandardField.YEAR, "2013")
+                .withField(StandardField.PAGES, "41--44");
 
-        List<BibEntry> foundEntries = finder.performSearch("Detecting Interoperability and Correctness Issues in BPMN 2.0 Process Models");
+        List<BibEntry> foundEntries = fetcher.performSearch("Detecting Interoperability and Correctness Issues in BPMN 2.0 Process Models");
 
         assertEquals(Collections.singletonList(entry), foundEntries);
     }
 
     @Test
     @DisabledOnCIServer("CI server is blocked by Google")
     void findManyEntries() throws FetcherException {
-        List<BibEntry> foundEntries = finder.performSearch("random test string");
+        List<BibEntry> foundEntries = fetcher.performSearch("random test string");
 
         assertEquals(20, foundEntries.size());
     }
 
     @Override
     public SearchBasedFetcher getFetcher() {
-        return finder;
+        return fetcher;
     }
 
     @Override