Skip to content

Commit

Permalink
feature: add favicon scraper. Fixes #40
Browse files Browse the repository at this point in the history
  • Loading branch information
gottfrois committed Sep 21, 2014
1 parent 33d0873 commit 1cca922
Show file tree
Hide file tree
Showing 14 changed files with 101 additions and 375 deletions.
2 changes: 1 addition & 1 deletion lib/link_thumbnailer/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def initialize
%r{^http://pixel\.quantserve\.com/},
%r{^http://s7\.addthis\.com/}
]
@attributes = [:title, :images, :description, :videos]
@attributes = [:title, :images, :description, :videos, :favicon]
@graders = [
->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
Expand Down
25 changes: 25 additions & 0 deletions lib/link_thumbnailer/models/favicon.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
require 'link_thumbnailer/model'

module LinkThumbnailer
module Models
class Favicon < ::LinkThumbnailer::Model

attr_reader :uri

def initialize(uri)
@uri = uri
end

def to_s
uri.to_s
end

def as_json(*)
{
src: to_s
}
end

end
end
end
3 changes: 2 additions & 1 deletion lib/link_thumbnailer/models/website.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module LinkThumbnailer
module Models
class Website < ::LinkThumbnailer::Model

attr_accessor :url, :title, :description, :images, :videos
attr_accessor :url, :title, :description, :images, :videos, :favicon

def initialize
@images = []
Expand Down Expand Up @@ -39,6 +39,7 @@ def images
def as_json(*)
{
url: url.to_s,
favicon: favicon,
title: title,
description: description,
images: images,
Expand Down
2 changes: 2 additions & 0 deletions lib/link_thumbnailer/scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
require 'link_thumbnailer/scrapers/opengraph/images'
require 'link_thumbnailer/scrapers/default/videos'
require 'link_thumbnailer/scrapers/opengraph/videos'
require 'link_thumbnailer/scrapers/default/favicon'
require 'link_thumbnailer/scrapers/opengraph/favicon'

module LinkThumbnailer
class Scraper < ::SimpleDelegator
Expand Down
36 changes: 36 additions & 0 deletions lib/link_thumbnailer/scrapers/default/favicon.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
require 'link_thumbnailer/scrapers/default/base'
require 'link_thumbnailer/models/favicon'

module LinkThumbnailer
module Scrapers
module Default
class Favicon < ::LinkThumbnailer::Scrapers::Default::Base

def value
modelize(to_uri(href)).to_s
end

private

def to_uri(href)
::URI.parse(href)
rescue ::URI::InvalidURIError
nil
end

def href
node.attributes['href'].value.to_s if node
end

def node
document.xpath("//link[contains(@rel, 'icon')]").first
end

def modelize(uri)
model_class.new(uri)
end

end
end
end
end
15 changes: 15 additions & 0 deletions lib/link_thumbnailer/scrapers/opengraph/favicon.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
require 'link_thumbnailer/scrapers/opengraph/base'

module LinkThumbnailer
module Scrapers
module Opengraph
class Favicon < ::LinkThumbnailer::Scrapers::Opengraph::Base

def value
nil
end

end
end
end
end
2 changes: 1 addition & 1 deletion spec/configuration_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
it { expect(instance.verify_ssl).to eq(true) }
it { expect(instance.http_timeout).to eq(5) }
it { expect(instance.blacklist_urls).to_not be_empty }
it { expect(instance.attributes).to eq([:title, :images, :description, :videos]) }
it { expect(instance.attributes).to eq([:title, :images, :description, :videos, :favicon]) }
it { expect(instance.graders).to_not be_empty }
it { expect(instance.description_min_length).to eq(25) }
it { expect(instance.positive_regex).to_not be_nil }
Expand Down
9 changes: 0 additions & 9 deletions spec/examples/empty_og_image_example.html

This file was deleted.

6 changes: 6 additions & 0 deletions spec/fixture_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
let(:url) { 'http://foo.com' }
let(:png_url) { 'http://foo.com/foo.png' }
let(:video_url) { 'http://foo.com/foo.swf' }
let(:favicon) { 'http://foo.com/foo.ico' }
let(:png) { File.open(File.dirname(__FILE__) + '/fixtures/foo.png') }
let(:action) { LinkThumbnailer.generate(url) }

Expand All @@ -22,6 +23,7 @@

let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/og_valid_example.html').read() }

it { expect(action.favicon).to eq(favicon) }
it { expect(action.title).to eq(title) }
it { expect(action.description).to eq(description) }
it { expect(action.images.count).to eq(1) }
Expand All @@ -42,6 +44,7 @@
stub_request(:get, png_url_2).to_return(status: 200, body: png_2, headers: {})
end

it { expect(action.favicon).to eq('') }
it { expect(action.title).to eq(title) }
it { expect(action.description).to eq(description) }
it { expect(action.images.count).to eq(2) }
Expand All @@ -55,6 +58,7 @@
let(:video_url_2) { 'http://foo.com/bar.swf' }
let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/og_valid_multi_video_example.html').read() }

it { expect(action.favicon).to eq('') }
it { expect(action.title).to eq(title) }
it { expect(action.description).to eq(description) }
it { expect(action.videos.count).to eq(2) }
Expand Down Expand Up @@ -84,6 +88,7 @@
let(:title) { 'Title from meta' }
let(:description) { 'Description from meta' }

it { expect(action.favicon).to eq(favicon) }
it { expect(action.title).to eq(title) }
it { expect(action.description).to eq(description) }

Expand All @@ -94,6 +99,7 @@
let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/default_from_body.html').read() }
let(:description) { 'Description from body' }

it { expect(action.favicon).to eq(favicon) }
it { expect(action.description).to eq(description) }
it { expect(action.images.count).to eq(1) }
it { expect(action.images.first.src.to_s).to eq(png_url) }
Expand Down
1 change: 1 addition & 0 deletions spec/fixtures/default_from_body.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<html>
<head>
<title>Title from meta</title>
<link rel="shortcut icon" href="http://foo.com/foo.ico">
</head>
<body>

Expand Down
1 change: 1 addition & 0 deletions spec/fixtures/default_from_meta.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
<head>
<title>Title from meta</title>
<meta content="Description from meta" name="description">
<link rel="icon whatever" href="http://foo.com/foo.ico">
</head>
<body>

Expand Down
Loading

0 comments on commit 1cca922

Please sign in to comment.