From ac148b3125346e70dc5eaeb3eb9cf130d21a7aa8 Mon Sep 17 00:00:00 2001 From: Johannes Meyer zum Alten Borgloh Date: Thu, 11 Oct 2018 20:01:31 +0200 Subject: [PATCH] Fixes pagination detection in Tumblr likes. Fixes the pagination detection in the Tumblr likes crawler for blogs that contain a dash in the name. --- .../TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs index e388e6c..d045415 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs @@ -215,7 +215,7 @@ private long ExtractNextPageLink(string document) // long unixTime = 0; - var pagination = "(id=\"next_page_link\" href=\"[A-Za-z0-9_/:.]+/([0-9]+)/([A-Za-z0-9]+))\""; + var pagination = "(id=\"next_page_link\" href=\"[A-Za-z0-9_/:.-]+/([0-9]+)/([A-Za-z0-9]+))\""; long.TryParse(Regex.Match(document, pagination).Groups[3].Value, out unixTime); return unixTime; }