From 08d2ace5b49b4b543ad5e1757c2d92381c91e162 Mon Sep 17 00:00:00 2001 From: Thomas <71355143+thomas694@users.noreply.github.com> Date: Thu, 1 Apr 2021 21:41:14 +0200 Subject: [PATCH] Add issue #101 "Can not download a specific tumblr anymore" - Due to the last fix it is now possible again to add blogs with custom domain by using its tumblr url. So the redirect is caught and the blog is updated with the new url. - Now it is also possible to add these blogs with custom domain directly through the 'add blog' field. The entered url is downloaded and checked for tumblr critera and if found, the blog is added to the list. - The clipboard monitor has not been adapted. It shall not check all url-like texts, only explicit tumblr urls. --- src/TumblThree/SharedAssemblyInfo.cs | 4 ++-- .../Controllers/ManagerController.cs | 15 +++++++++++---- .../Crawler/ITumblrBlogDetector.cs | 2 ++ .../Crawler/TumblrBlogDetector.cs | 18 ++++++++++++++++++ .../TumblThree.Domain/Models/BlogFactory.cs | 6 ++++++ .../Models/Blogs/TumblrBlog.cs | 16 +++++++++++++--- .../TumblThree.Domain/Models/IBlogFactory.cs | 2 ++ .../TumblThree.Domain/Models/IUrlValidator.cs | 2 ++ .../TumblThree.Domain/Models/UrlValidator.cs | 8 ++++++++ 9 files changed, 64 insertions(+), 9 deletions(-) diff --git a/src/TumblThree/SharedAssemblyInfo.cs b/src/TumblThree/SharedAssemblyInfo.cs index dbf5779f..9f30fdc1 100644 --- a/src/TumblThree/SharedAssemblyInfo.cs +++ b/src/TumblThree/SharedAssemblyInfo.cs @@ -12,5 +12,5 @@ [assembly: ComVisible(false)] [assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)] -[assembly: AssemblyVersion("1.1.0.0")] -[assembly: AssemblyFileVersion("1.3.1.0")] +[assembly: AssemblyVersion("1.4.0.0")] +[assembly: AssemblyFileVersion("1.4.0.0")] diff --git a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs index 5825760c..aa426e7f 100644 --- a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs +++ b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs @@ -456,7 +456,7 @@ private void EnqueueAutoDownload() } } - private bool CanAddBlog() => _blogFactory.IsValidTumblrBlogUrl(_crawlerService.NewBlogUrl); + private bool CanAddBlog() => _blogFactory.IsValidTumblrBlogUrl(_crawlerService.NewBlogUrl) || _blogFactory.IsValidUrl(_crawlerService.NewBlogUrl); private async Task AddBlog() { @@ -476,12 +476,13 @@ private async Task AddBlog() catch (Exception e) { Logger.Error($"ManagerController:AddBlog: {e}"); + _shellService.ShowError(e, e.Message); } } private void CleanFailedAddBlog() { - IBlog blog = CheckIfCrawlableBlog(_crawlerService.NewBlogUrl); + IBlog blog = CheckIfCrawlableBlog(_crawlerService.NewBlogUrl).GetAwaiter().GetResult(); if (Directory.Exists(Path.Combine(Directory.GetParent(blog.Location).FullName, blog.Name)) && !Directory.EnumerateFileSystemEntries(Path.Combine(Directory.GetParent(blog.Location).FullName, blog.Name)).Any()) { @@ -645,7 +646,7 @@ private async Task AddBlogAsync(string blogUrl) blogUrl = _crawlerService.NewBlogUrl; } - IBlog blog = CheckIfCrawlableBlog(blogUrl); + IBlog blog = await CheckIfCrawlableBlog(blogUrl); blog = await CheckIfBlogIsHiddenTumblrBlogAsync(blog); @@ -705,8 +706,14 @@ private async Task UpdateMetaInformationAsync(IBlog blog) crawler.Dispose(); } - private IBlog CheckIfCrawlableBlog(string blogUrl) + private async Task CheckIfCrawlableBlog(string blogUrl) { + if (!_blogFactory.IsValidTumblrBlogUrl(blogUrl) && _blogFactory.IsValidUrl(blogUrl)) + { + if ( await _tumblrBlogDetector.IsTumblrBlogWithCustomDomainAsync(blogUrl)) + return TumblrBlog.Create(blogUrl, Path.Combine(_shellService.Settings.DownloadLocation, "Index"), _shellService.Settings.FilenameTemplate, true); + throw new Exception($"The url '{blogUrl}' cannot be recognized as Tumblr blog!"); + } return _blogFactory.GetBlog(blogUrl, Path.Combine(_shellService.Settings.DownloadLocation, "Index"), _shellService.Settings.FilenameTemplate); } diff --git a/src/TumblThree/TumblThree.Applications/Crawler/ITumblrBlogDetector.cs b/src/TumblThree/TumblThree.Applications/Crawler/ITumblrBlogDetector.cs index d328add9..29465256 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/ITumblrBlogDetector.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/ITumblrBlogDetector.cs @@ -9,5 +9,7 @@ public interface ITumblrBlogDetector Task IsPasswordProtectedTumblrBlogAsync(string url); Task IsTumblrBlogAsync(string url); + + Task IsTumblrBlogWithCustomDomainAsync(string url); } } diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogDetector.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogDetector.cs index 5b5122bf..47cb0a12 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogDetector.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogDetector.cs @@ -42,6 +42,12 @@ public async Task IsPasswordProtectedTumblrBlogAsync(string url) return location.Contains("blog_auth"); } + public async Task IsTumblrBlogWithCustomDomainAsync(string url) + { + string page = await GetPage(url); + return page.Contains("tumblr://x-callback-url/blog?blogName="); + } + private async Task GetUrlRedirection(string url) { HttpWebRequest request = webRequestFactory.CreateGetRequest(url); @@ -54,5 +60,17 @@ private async Task GetUrlRedirection(string url) return location; } + + private async Task GetPage(string url) + { + HttpWebRequest request = webRequestFactory.CreateGetRequest(url); + string page; + using (var response = await request.GetResponseAsync().TimeoutAfter(shellService.Settings.TimeOut) as HttpWebResponse) + { + page = await webRequestFactory.ReadRequestToEndAsync(request); + } + + return page; + } } } diff --git a/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs b/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs index dbb389b7..480de91d 100644 --- a/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs +++ b/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs @@ -28,6 +28,12 @@ public bool IsValidTumblrBlogUrl(string blogUrl) || _urlValidator.IsTumbexUrl(blogUrl); } + public bool IsValidUrl(string url) + { + url = _urlValidator.AddHttpsProtocol(url); + return _urlValidator.IsValidUrl(url); + } + public IBlog GetBlog(string blogUrl, string path, string filenameTemplate) { blogUrl = _urlValidator.AddHttpsProtocol(blogUrl); diff --git a/src/TumblThree/TumblThree.Domain/Models/Blogs/TumblrBlog.cs b/src/TumblThree/TumblThree.Domain/Models/Blogs/TumblrBlog.cs index ea21e50b..86c98051 100644 --- a/src/TumblThree/TumblThree.Domain/Models/Blogs/TumblrBlog.cs +++ b/src/TumblThree/TumblThree.Domain/Models/Blogs/TumblrBlog.cs @@ -1,4 +1,5 @@ using System; +using System.Globalization; using System.IO; using System.Runtime.Serialization; @@ -9,12 +10,14 @@ namespace TumblThree.Domain.Models.Blogs [DataContract] public class TumblrBlog : Blog { - public static Blog Create(string url, string location, string filenameTemplate) + public static Blog Create(string url, string location, string filenameTemplate, bool isCustomDomain = false) { + url = isCustomDomain ? url : ExtractUrl(url); + var name = isCustomDomain ? ExtractCustomName(url) : ExtractName(url); var blog = new TumblrBlog() { - Url = ExtractUrl(url), - Name = ExtractName(url), + Url = url, + Name = name, BlogType = Models.BlogTypes.tumblr, OriginalBlogType = Models.BlogTypes.tumblr, Location = location, @@ -36,5 +39,12 @@ public static Blog Create(string url, string location, string filenameTemplate) return blog; } + + private static string ExtractCustomName(string url) + { + url = url.ToLower(CultureInfo.InvariantCulture).Replace("https://", string.Empty).Replace("http://", string.Empty).TrimEnd('/'); + var parts = url.Split('.'); + return parts[parts.Length - 2]; + } } } diff --git a/src/TumblThree/TumblThree.Domain/Models/IBlogFactory.cs b/src/TumblThree/TumblThree.Domain/Models/IBlogFactory.cs index 099e694c..62ed2e75 100644 --- a/src/TumblThree/TumblThree.Domain/Models/IBlogFactory.cs +++ b/src/TumblThree/TumblThree.Domain/Models/IBlogFactory.cs @@ -6,6 +6,8 @@ public interface IBlogFactory { bool IsValidTumblrBlogUrl(string blogUrl); + bool IsValidUrl(string blogUrl); + IBlog GetBlog(string blogUrl, string path, string filenameTemplate); } } diff --git a/src/TumblThree/TumblThree.Domain/Models/IUrlValidator.cs b/src/TumblThree/TumblThree.Domain/Models/IUrlValidator.cs index 2825be12..177f71ef 100644 --- a/src/TumblThree/TumblThree.Domain/Models/IUrlValidator.cs +++ b/src/TumblThree/TumblThree.Domain/Models/IUrlValidator.cs @@ -15,5 +15,7 @@ public interface IUrlValidator string AddHttpsProtocol(string url); bool IsTumbexUrl(string url); + + bool IsValidUrl(string url); } } diff --git a/src/TumblThree/TumblThree.Domain/Models/UrlValidator.cs b/src/TumblThree/TumblThree.Domain/Models/UrlValidator.cs index 7a4ac0f4..d223d928 100644 --- a/src/TumblThree/TumblThree.Domain/Models/UrlValidator.cs +++ b/src/TumblThree/TumblThree.Domain/Models/UrlValidator.cs @@ -9,6 +9,7 @@ namespace TumblThree.Domain.Models public class UrlValidator : IUrlValidator { private readonly Regex tumbexRegex = new Regex("(http[A-Za-z0-9_/:.]*www.tumbex.com[A-Za-z0-9_/:.-]*tumblr/)"); + private readonly Regex urlRegex = new Regex("(^https?://[A-Za-z0-9_.]*[/]?$)"); public bool IsValidTumblrUrl(string url) { @@ -51,6 +52,13 @@ public bool IsValidTumblrTagSearchUrl(string url) (url.StartsWith("http://", true, null) || url.StartsWith("https://", true, null)); } + public bool IsValidUrl(string url) + { + return url != null && !url.Any(char.IsWhiteSpace) && + (url.StartsWith("http://", true, null) || url.StartsWith("https://", true, null)) && + urlRegex.IsMatch(url); + } + public string AddHttpsProtocol(string url) { if (url == null)