From 1aebd2e8b39595e5e5b3c7fe1bf02aaaa1843c40 Mon Sep 17 00:00:00 2001 From: Johannes Meyer zum Alten Borgloh Date: Thu, 6 Dec 2018 20:10:54 +0100 Subject: [PATCH] Uses RegEx to search for images and videos. Uses regular expressions to search for images and videos in everything TumblThree scans. --- .../Controllers/DetailsController.cs | 2 + .../Crawler/AbstractTumblrCrawler.cs | 2 +- .../Crawler/TumblrBlogCrawler.cs | 6 + .../Crawler/TumblrHiddenCrawler.cs | 12 +- .../Crawler/TumblrLikedByCrawler.cs | 6 + .../Crawler/TumblrSearchCrawler.cs | 6 + .../Crawler/TumblrTagSearchCrawler.cs | 6 + .../Properties/AppSettings.cs | 4 + .../Services/SettingsService.cs | 2 + .../ViewModels/SettingsViewModel.cs | 20 + .../TumblThree.Domain/Models/Blogs/Blog.cs | 24 + .../TumblThree.Domain/Models/Blogs/IBlog.cs | 4 + .../Properties/Resources.Designer.cs | 56 ++ .../Properties/Resources.resx | 20 + .../Views/DetailsViews/DetailsAllView.xaml | 36 +- .../DetailsViews/DetailsTumblrBlogView.xaml | 835 ++++++++--------- .../DetailsTumblrHiddenBlogView.xaml | 836 +++++++++--------- .../DetailsTumblrLikedByView.xaml | 32 + .../DetailsViews/DetailsTumblrSearchView.xaml | 36 +- .../DetailsTumblrTagSearchView.xaml | 32 + .../Views/SettingsView.xaml | 33 +- 21 files changed, 1196 insertions(+), 814 deletions(-) diff --git a/src/TumblThree/TumblThree.Applications/Controllers/DetailsController.cs b/src/TumblThree/TumblThree.Applications/Controllers/DetailsController.cs index 7792a41..2aa3959 100644 --- a/src/TumblThree/TumblThree.Applications/Controllers/DetailsController.cs +++ b/src/TumblThree/TumblThree.Applications/Controllers/DetailsController.cs @@ -182,6 +182,8 @@ public IBlog CreateFromMultiple(IEnumerable blogFiles) CatBoxType = SetProperty(sharedBlogFiles, "CatBoxType"), MetadataFormat = SetProperty(sharedBlogFiles, "MetadataFormat"), DumpCrawlerData = SetCheckBox(sharedBlogFiles, "DumpCrawlerData"), + RegExPhotos = SetCheckBox(sharedBlogFiles, "RegExPhotos"), + RegExVideos = SetCheckBox(sharedBlogFiles, "RegExVideos"), FileDownloadLocation = SetProperty(sharedBlogFiles, "FileDownloadLocation"), Dirty = false }; diff --git a/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs index a871fcc..830b3eb 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs @@ -248,7 +248,7 @@ protected void AddTumblrVideoUrl(string post) } } - protected void AddGernicPhotoUrl(string post) + protected void AddGenericPhotoUrl(string post) { foreach (string imageUrl in tumblrParser.SearchForGenericPhotoUrl(post)) { diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs index a198734..231e37a 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs @@ -170,6 +170,9 @@ private void AddPhotoUrlToDownloadList(string document) return; AddTumblrPhotoUrl(document); + + if (blog.RegExPhotos) + AddGenericPhotoUrl(document); } private void AddVideoUrlToDownloadList(string document) @@ -178,6 +181,9 @@ private void AddVideoUrlToDownloadList(string document) return; AddTumblrVideoUrl(document); + + if (blog.RegExVideos) + AddGenericVideoUrl(document); } } } diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs index eff3c0a..fe23cd7 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs @@ -413,6 +413,9 @@ private void AddPhotoUrlToDownloadList(Post post) } AddInlinePhotoUrl(postCopy); + + if (blog.RegExPhotos) + AddGenericInlinePhotoUrl(post); } private void AddPhotoUrl(Post post) @@ -437,6 +440,11 @@ private void AddInlinePhotoUrl(Post post) AddTumblrPhotoUrl(InlineSearch(post)); } + private void AddGenericInlinePhotoUrl(Post post) + { + AddTumblrPhotoUrl(InlineSearch(post)); + } + private void AddVideoUrlToDownloadList(Post post) { if (!blog.DownloadVideo) @@ -456,8 +464,8 @@ private void AddVideoUrlToDownloadList(Post post) AddInlineVideoUrl(postCopy); AddInlineTumblrVideoUrl(postCopy, new Regex("\"(https?://ve.media.tumblr.com/(tumblr_[\\w]*))")); AddInlineTumblrVideoUrl(postCopy, new Regex("\"(https?://vtt.tumblr.com/(tumblr_[\\w]*))")); - // TODO: Make generic inline video detection optional - AddGenericInlineVideoUrl(postCopy); + if (blog.RegExVideos) + AddGenericInlineVideoUrl(postCopy); //AddInlineVideoUrlsToDownloader(videoUrls, postCopy); } diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs index af0bf0b..c21a3c0 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrLikedByCrawler.cs @@ -233,6 +233,9 @@ private void AddPhotoUrlToDownloadList(string document) if (!blog.DownloadPhoto) return; AddTumblrPhotoUrl(document); + + if (blog.RegExPhotos) + AddGenericPhotoUrl(document); } private void AddVideoUrlToDownloadList(string document) @@ -240,6 +243,9 @@ private void AddVideoUrlToDownloadList(string document) if (!blog.DownloadVideo) return; AddTumblrVideoUrl(document); + + if (blog.RegExVideos) + AddGenericVideoUrl(document); } } } diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs index f2eae42..c5b4ee1 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs @@ -190,6 +190,9 @@ private void AddPhotoUrlToDownloadList(string document) if (!blog.DownloadPhoto) return; AddTumblrPhotoUrl(document); + + if (blog.RegExPhotos) + AddGenericPhotoUrl(document); } private void AddVideoUrlToDownloadList(string document) @@ -197,6 +200,9 @@ private void AddVideoUrlToDownloadList(string document) if (!blog.DownloadVideo) return; AddTumblrVideoUrl(document); + + if (blog.RegExVideos) + AddGenericVideoUrl(document); } } } diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrTagSearchCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrTagSearchCrawler.cs index 5b76af7..92d2f33 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrTagSearchCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrTagSearchCrawler.cs @@ -227,6 +227,9 @@ private void AddPhotoUrlToDownloadList(string document) if (!blog.DownloadPhoto) return; AddTumblrPhotoUrl(document); + + if (blog.RegExPhotos) + AddGenericPhotoUrl(document); } private void AddVideoUrlToDownloadList(string document) @@ -234,6 +237,9 @@ private void AddVideoUrlToDownloadList(string document) if (!blog.DownloadVideo) return; AddTumblrVideoUrl(document); + + if (blog.RegExVideos) + AddGenericVideoUrl(document); } } } diff --git a/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs b/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs index 7dee9ed..ecf9125 100644 --- a/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs +++ b/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs @@ -159,6 +159,10 @@ public AppSettings() [DataMember] public bool DumpCrawlerData { get; set; } + [DataMember] public bool RegExPhotos { get; set; } + + [DataMember] public bool RegExVideos { get; set; } + [DataMember] public bool DownloadRebloggedPosts { get; set; } [DataMember] public bool DownloadGfycat { get; set; } diff --git a/src/TumblThree/TumblThree.Applications/Services/SettingsService.cs b/src/TumblThree/TumblThree.Applications/Services/SettingsService.cs index a4cc81d..79ac54a 100644 --- a/src/TumblThree/TumblThree.Applications/Services/SettingsService.cs +++ b/src/TumblThree/TumblThree.Applications/Services/SettingsService.cs @@ -55,6 +55,8 @@ public IBlog TransferGlobalSettingsToBlog(IBlog blog) blog.LoliSafeType = shellService.Settings.LoliSafeType; blog.CatBoxType = shellService.Settings.CatBoxType; blog.DumpCrawlerData = shellService.Settings.DumpCrawlerData; + blog.RegExPhotos = shellService.Settings.RegExPhotos; + blog.RegExVideos = shellService.Settings.RegExVideos; return blog; } } diff --git a/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs b/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs index b0c4fd5..d85ddb3 100644 --- a/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs +++ b/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs @@ -51,6 +51,8 @@ public class SettingsViewModel : ViewModel private bool createImageMeta; private bool createVideoMeta; private bool dumpCrawlerData; + private bool regExPhotos; + private bool regExVideos; private string downloadPages; private int pageSize; private string downloadFrom; @@ -496,6 +498,18 @@ public bool DumpCrawlerData set => SetProperty(ref dumpCrawlerData, value); } + public bool RegExPhotos + { + get => regExPhotos; + set => SetProperty(ref regExPhotos, value); + } + + public bool RegExVideos + { + get => regExVideos; + set => SetProperty(ref regExVideos, value); + } + public string DownloadPages { get => downloadPages; @@ -867,6 +881,8 @@ private void LoadSettings() CreateAudioMeta = settings.CreateAudioMeta; MetadataFormat = settings.MetadataFormat; DumpCrawlerData = settings.DumpCrawlerData; + RegExPhotos = settings.RegExPhotos; + RegExVideos = settings.RegExVideos; DownloadPages = settings.DownloadPages; PageSize = settings.PageSize; DownloadFrom = settings.DownloadFrom; @@ -947,6 +963,8 @@ private void LoadSettings() CreateAudioMeta = false; MetadataFormat = MetadataType.Text; DumpCrawlerData = false; + RegExPhotos = false; + RegExVideos = false; DownloadPages = string.Empty; PageSize = 50; DownloadFrom = string.Empty; @@ -1078,6 +1096,8 @@ private void SaveSettings() settings.CreateAudioMeta = CreateAudioMeta; settings.MetadataFormat = MetadataFormat; settings.DumpCrawlerData = DumpCrawlerData; + settings.RegExPhotos = RegExPhotos; + settings.RegExVideos = RegExVideos; settings.DownloadPages = DownloadPages; settings.PageSize = PageSize; settings.DownloadFrom = DownloadFrom; diff --git a/src/TumblThree/TumblThree.Domain/Models/Blogs/Blog.cs b/src/TumblThree/TumblThree.Domain/Models/Blogs/Blog.cs index bd7f149..3bac936 100644 --- a/src/TumblThree/TumblThree.Domain/Models/Blogs/Blog.cs +++ b/src/TumblThree/TumblThree.Domain/Models/Blogs/Blog.cs @@ -29,6 +29,8 @@ public class Blog : Model, IBlog private bool downloadUrlList; private bool downloadVideo; private bool dumpCrawlerData; + private bool regExPhotos; + private bool regExVideos; private string fileDownloadLocation; private bool forceRescan; private bool forceSize; @@ -222,6 +224,28 @@ public bool DumpCrawlerData } } + [DataMember] + public bool RegExPhotos + { + get => regExPhotos; + set + { + SetProperty(ref regExPhotos, value); + Dirty = true; + } + } + + [DataMember] + public bool RegExVideos + { + get => regExVideos; + set + { + SetProperty(ref regExVideos, value); + Dirty = true; + } + } + [DataMember] public string FileDownloadLocation { diff --git a/src/TumblThree/TumblThree.Domain/Models/Blogs/IBlog.cs b/src/TumblThree/TumblThree.Domain/Models/Blogs/IBlog.cs index cc05be2..eebf451 100644 --- a/src/TumblThree/TumblThree.Domain/Models/Blogs/IBlog.cs +++ b/src/TumblThree/TumblThree.Domain/Models/Blogs/IBlog.cs @@ -102,6 +102,10 @@ public interface IBlog : INotifyPropertyChanged bool DumpCrawlerData { get; set; } + bool RegExPhotos { get; set; } + + bool RegExVideos { get; set; } + string FileDownloadLocation { get; set; } string DownloadPages { get; set; } diff --git a/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs b/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs index 205d3dd..ba8e1ad 100644 --- a/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs +++ b/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs @@ -1212,6 +1212,24 @@ public static string Rating { } } + /// + /// Looks up a localized string similar to Search for image patterns. + /// + public static string RegExPhotos { + get { + return ResourceManager.GetString("RegExPhotos", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Search for video patterns. + /// + public static string RegExVideos { + get { + return ResourceManager.GetString("RegExVideos", resourceCulture); + } + } + /// /// Looks up a localized string similar to Remove Blog. /// @@ -2173,6 +2191,44 @@ public static string ToolTipProxyPasswordDescription { } } + /// + /// Looks up a localized string similar to Search for images in the crawl data. + /// + public static string ToolTipRegExPhotos { + get { + return ResourceManager.GetString("ToolTipRegExPhotos", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Uses regular expressions to search for images in everything TumblThree scans. + ///This will add plenty of duplicate image urls to the queue, but might gather images from websites that are currently not supported by a specifically written parser.. + /// + public static string ToolTipRegExPhotosDescription { + get { + return ResourceManager.GetString("ToolTipRegExPhotosDescription", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Search for videos in the crawl data. + /// + public static string ToolTipRegExVideos { + get { + return ResourceManager.GetString("ToolTipRegExVideos", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Uses regular expressions to search for videos in everything TumblThree scans. + ///This will add plenty of duplicate video urls to the queue, but might gather videos from websites that are currently not supported by a specifically written parser.. + /// + public static string ToolTipRegExVideosDescription { + get { + return ResourceManager.GetString("ToolTipRegExVideosDescription", resourceCulture); + } + } + /// /// Looks up a localized string similar to File type to download. /// diff --git a/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx b/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx index 84d2663..84dcb37 100644 --- a/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx +++ b/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx @@ -916,4 +916,24 @@ The cookie with your credentials of your account is saved and the authentication Refresh rate of the progress information in the queue panel. This value determines the information refresh rate for each individual queued blog. + + Search for image patterns + + + Search for video patterns + + + Search for images in the crawl data + + + Uses regular expressions to search for images in everything TumblThree scans. +This will add plenty of duplicate image urls to the queue, but might gather images from websites that are currently not supported by a specifically written parser. + + + Search for videos in the crawl data + + + Uses regular expressions to search for videos in everything TumblThree scans. +This will add plenty of duplicate video urls to the queue, but might gather videos from websites that are currently not supported by a specifically written parser. + \ No newline at end of file diff --git a/src/TumblThree/TumblThree.Presentation/Views/DetailsViews/DetailsAllView.xaml b/src/TumblThree/TumblThree.Presentation/Views/DetailsViews/DetailsAllView.xaml index d0bb11e..701a274 100644 --- a/src/TumblThree/TumblThree.Presentation/Views/DetailsViews/DetailsAllView.xaml +++ b/src/TumblThree/TumblThree.Presentation/Views/DetailsViews/DetailsAllView.xaml @@ -206,6 +206,10 @@ + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - - - - - - + + + + - - - - + + + + - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + - - - - - - + + + + - - - - + + + + - + + + +