Skip to content
This repository has been archived by the owner on Mar 9, 2021. It is now read-only.

Commit

Permalink
Uses RegEx to search for images and videos.
Browse files Browse the repository at this point in the history
Uses regular expressions to search for images and videos in everything TumblThree scans.
  • Loading branch information
johanneszab committed Dec 6, 2018
1 parent c7bbfc3 commit 1aebd2e
Show file tree
Hide file tree
Showing 21 changed files with 1,196 additions and 814 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ public IBlog CreateFromMultiple(IEnumerable<IBlog> blogFiles)
CatBoxType = SetProperty<CatBoxTypes>(sharedBlogFiles, "CatBoxType"),
MetadataFormat = SetProperty<MetadataType>(sharedBlogFiles, "MetadataFormat"),
DumpCrawlerData = SetCheckBox(sharedBlogFiles, "DumpCrawlerData"),
RegExPhotos = SetCheckBox(sharedBlogFiles, "RegExPhotos"),
RegExVideos = SetCheckBox(sharedBlogFiles, "RegExVideos"),
FileDownloadLocation = SetProperty<string>(sharedBlogFiles, "FileDownloadLocation"),
Dirty = false
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ protected void AddTumblrVideoUrl(string post)
}
}

protected void AddGernicPhotoUrl(string post)
protected void AddGenericPhotoUrl(string post)
{
foreach (string imageUrl in tumblrParser.SearchForGenericPhotoUrl(post))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ private void AddPhotoUrlToDownloadList(string document)
return;

AddTumblrPhotoUrl(document);

if (blog.RegExPhotos)
AddGenericPhotoUrl(document);
}

private void AddVideoUrlToDownloadList(string document)
Expand All @@ -178,6 +181,9 @@ private void AddVideoUrlToDownloadList(string document)
return;

AddTumblrVideoUrl(document);

if (blog.RegExVideos)
AddGenericVideoUrl(document);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,9 @@ private void AddPhotoUrlToDownloadList(Post post)
}

AddInlinePhotoUrl(postCopy);

if (blog.RegExPhotos)
AddGenericInlinePhotoUrl(post);
}

private void AddPhotoUrl(Post post)
Expand All @@ -437,6 +440,11 @@ private void AddInlinePhotoUrl(Post post)
AddTumblrPhotoUrl(InlineSearch(post));
}

private void AddGenericInlinePhotoUrl(Post post)
{
AddTumblrPhotoUrl(InlineSearch(post));
}

private void AddVideoUrlToDownloadList(Post post)
{
if (!blog.DownloadVideo)
Expand All @@ -456,8 +464,8 @@ private void AddVideoUrlToDownloadList(Post post)
AddInlineVideoUrl(postCopy);
AddInlineTumblrVideoUrl(postCopy, new Regex("\"(https?://ve.media.tumblr.com/(tumblr_[\\w]*))"));
AddInlineTumblrVideoUrl(postCopy, new Regex("\"(https?://vtt.tumblr.com/(tumblr_[\\w]*))"));
// TODO: Make generic inline video detection optional
AddGenericInlineVideoUrl(postCopy);
if (blog.RegExVideos)
AddGenericInlineVideoUrl(postCopy);

//AddInlineVideoUrlsToDownloader(videoUrls, postCopy);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,19 @@ private void AddPhotoUrlToDownloadList(string document)
if (!blog.DownloadPhoto)
return;
AddTumblrPhotoUrl(document);

if (blog.RegExPhotos)
AddGenericPhotoUrl(document);
}

private void AddVideoUrlToDownloadList(string document)
{
if (!blog.DownloadVideo)
return;
AddTumblrVideoUrl(document);

if (blog.RegExVideos)
AddGenericVideoUrl(document);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,19 @@ private void AddPhotoUrlToDownloadList(string document)
if (!blog.DownloadPhoto)
return;
AddTumblrPhotoUrl(document);

if (blog.RegExPhotos)
AddGenericPhotoUrl(document);
}

private void AddVideoUrlToDownloadList(string document)
{
if (!blog.DownloadVideo)
return;
AddTumblrVideoUrl(document);

if (blog.RegExVideos)
AddGenericVideoUrl(document);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,19 @@ private void AddPhotoUrlToDownloadList(string document)
if (!blog.DownloadPhoto)
return;
AddTumblrPhotoUrl(document);

if (blog.RegExPhotos)
AddGenericPhotoUrl(document);
}

private void AddVideoUrlToDownloadList(string document)
{
if (!blog.DownloadVideo)
return;
AddTumblrVideoUrl(document);

if (blog.RegExVideos)
AddGenericVideoUrl(document);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ public AppSettings()

[DataMember] public bool DumpCrawlerData { get; set; }

[DataMember] public bool RegExPhotos { get; set; }

[DataMember] public bool RegExVideos { get; set; }

[DataMember] public bool DownloadRebloggedPosts { get; set; }

[DataMember] public bool DownloadGfycat { get; set; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public IBlog TransferGlobalSettingsToBlog(IBlog blog)
blog.LoliSafeType = shellService.Settings.LoliSafeType;
blog.CatBoxType = shellService.Settings.CatBoxType;
blog.DumpCrawlerData = shellService.Settings.DumpCrawlerData;
blog.RegExPhotos = shellService.Settings.RegExPhotos;
blog.RegExVideos = shellService.Settings.RegExVideos;
return blog;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public class SettingsViewModel : ViewModel<ISettingsView>
private bool createImageMeta;
private bool createVideoMeta;
private bool dumpCrawlerData;
private bool regExPhotos;
private bool regExVideos;
private string downloadPages;
private int pageSize;
private string downloadFrom;
Expand Down Expand Up @@ -496,6 +498,18 @@ public bool DumpCrawlerData
set => SetProperty(ref dumpCrawlerData, value);
}

public bool RegExPhotos
{
get => regExPhotos;
set => SetProperty(ref regExPhotos, value);
}

public bool RegExVideos
{
get => regExVideos;
set => SetProperty(ref regExVideos, value);
}

public string DownloadPages
{
get => downloadPages;
Expand Down Expand Up @@ -867,6 +881,8 @@ private void LoadSettings()
CreateAudioMeta = settings.CreateAudioMeta;
MetadataFormat = settings.MetadataFormat;
DumpCrawlerData = settings.DumpCrawlerData;
RegExPhotos = settings.RegExPhotos;
RegExVideos = settings.RegExVideos;
DownloadPages = settings.DownloadPages;
PageSize = settings.PageSize;
DownloadFrom = settings.DownloadFrom;
Expand Down Expand Up @@ -947,6 +963,8 @@ private void LoadSettings()
CreateAudioMeta = false;
MetadataFormat = MetadataType.Text;
DumpCrawlerData = false;
RegExPhotos = false;
RegExVideos = false;
DownloadPages = string.Empty;
PageSize = 50;
DownloadFrom = string.Empty;
Expand Down Expand Up @@ -1078,6 +1096,8 @@ private void SaveSettings()
settings.CreateAudioMeta = CreateAudioMeta;
settings.MetadataFormat = MetadataFormat;
settings.DumpCrawlerData = DumpCrawlerData;
settings.RegExPhotos = RegExPhotos;
settings.RegExVideos = RegExVideos;
settings.DownloadPages = DownloadPages;
settings.PageSize = PageSize;
settings.DownloadFrom = DownloadFrom;
Expand Down
24 changes: 24 additions & 0 deletions src/TumblThree/TumblThree.Domain/Models/Blogs/Blog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public class Blog : Model, IBlog
private bool downloadUrlList;
private bool downloadVideo;
private bool dumpCrawlerData;
private bool regExPhotos;
private bool regExVideos;
private string fileDownloadLocation;
private bool forceRescan;
private bool forceSize;
Expand Down Expand Up @@ -222,6 +224,28 @@ public bool DumpCrawlerData
}
}

[DataMember]
public bool RegExPhotos
{
get => regExPhotos;
set
{
SetProperty(ref regExPhotos, value);
Dirty = true;
}
}

[DataMember]
public bool RegExVideos
{
get => regExVideos;
set
{
SetProperty(ref regExVideos, value);
Dirty = true;
}
}

[DataMember]
public string FileDownloadLocation
{
Expand Down
4 changes: 4 additions & 0 deletions src/TumblThree/TumblThree.Domain/Models/Blogs/IBlog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ public interface IBlog : INotifyPropertyChanged

bool DumpCrawlerData { get; set; }

bool RegExPhotos { get; set; }

bool RegExVideos { get; set; }

string FileDownloadLocation { get; set; }

string DownloadPages { get; set; }
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions src/TumblThree/TumblThree.Presentation/Properties/Resources.resx
Original file line number Diff line number Diff line change
Expand Up @@ -916,4 +916,24 @@ The cookie with your credentials of your account is saved and the authentication
<value>Refresh rate of the progress information in the queue panel.
This value determines the information refresh rate for each individual queued blog.</value>
</data>
<data name="RegExPhotos" xml:space="preserve">
<value>Search for image patterns</value>
</data>
<data name="RegExVideos" xml:space="preserve">
<value>Search for video patterns</value>
</data>
<data name="ToolTipRegExPhotos" xml:space="preserve">
<value>Search for images in the crawl data</value>
</data>
<data name="ToolTipRegExPhotosDescription" xml:space="preserve">
<value>Uses regular expressions to search for images in everything TumblThree scans.
This will add plenty of duplicate image urls to the queue, but might gather images from websites that are currently not supported by a specifically written parser.</value>
</data>
<data name="ToolTipRegExVideos" xml:space="preserve">
<value>Search for videos in the crawl data</value>
</data>
<data name="ToolTipRegExVideosDescription" xml:space="preserve">
<value>Uses regular expressions to search for videos in everything TumblThree scans.
This will add plenty of duplicate video urls to the queue, but might gather videos from websites that are currently not supported by a specifically written parser.</value>
</data>
</root>
Loading

0 comments on commit 1aebd2e

Please sign in to comment.