Skip to content

Commit

Permalink
Fix some link previews being incorrectly generated from other prior l…
Browse files Browse the repository at this point in the history
…inks (mastodon#16885)

* Add tests

* Fix some link previews being incorrectly generated from different prior links

PR mastodon#12403 added a cache to avoid redundant queries when the OEmbed endpoint can
be guessed from the URL. This caching mechanism is not perfectly correct as
there is no guarantee that all pages from a given domain share the same
OEmbed provider endpoint.

This PR prevents the FetchOEmbedService from caching OEmbed endpoint that
cannot be generalized by replacing a fully-qualified URL from the endpoint's
parameters, greatly reducing the number of incorrect cached generalizations.
  • Loading branch information
ClearlyClaire authored and kadoshita committed Nov 6, 2021
1 parent 6889c13 commit 257dfcb
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
5 changes: 4 additions & 1 deletion app/services/fetch_oembed_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

class FetchOEmbedService
ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
URL_REGEX = /(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i.freeze

attr_reader :url, :options, :format, :endpoint_url

Expand Down Expand Up @@ -65,10 +66,12 @@ def parse_cached_endpoint!
end

def cache_endpoint!
return unless URL_REGEX.match?(@endpoint_url)

url_domain = Addressable::URI.parse(@url).normalized_host

endpoint_hash = {
endpoint: @endpoint_url.gsub(/(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i, '={url}'),
endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'),
format: @format,
}

Expand Down
7 changes: 7 additions & 0 deletions spec/fixtures/requests/oembed_youtube.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE html>
<html>
<head>
<link rel="alternate" type="application/json+oembed" href="https://www.youtube.com/oembed?format=json&amp;url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE" title="What is Mastodon?">
</head>
<body></body>
</html>
41 changes: 41 additions & 0 deletions spec/services/fetch_oembed_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,32 @@

describe 'discover_provider' do
context 'when status code is 200 and MIME type is text/html' do
context 'when OEmbed endpoint contains URL as parameter' do
before do
stub_request(:get, 'https://www.youtube.com/watch?v=IPSbNdBmWKE').to_return(
status: 200,
headers: { 'Content-Type': 'text/html' },
body: request_fixture('oembed_youtube.html'),
)
stub_request(:get, 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE').to_return(
status: 200,
headers: { 'Content-Type': 'text/html' },
body: request_fixture('oembed_json_empty.html')
)
end

it 'returns new OEmbed::Provider for JSON provider' do
subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE')
expect(subject.endpoint_url).to eq 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE'
expect(subject.format).to eq :json
end

it 'stores URL template' do
subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE')
expect(Rails.cache.read('oembed_endpoint:www.youtube.com')[:endpoint]).to eq 'https://www.youtube.com/oembed?format=json&url={url}'
end
end

context 'Both of JSON and XML provider are discoverable' do
before do
stub_request(:get, 'https://host.test/oembed.html').to_return(
Expand All @@ -33,6 +59,11 @@
expect(subject.endpoint_url).to eq 'https://host.test/provider.xml'
expect(subject.format).to eq :xml
end

it 'does not cache OEmbed endpoint' do
subject.call('https://host.test/oembed.html', format: :xml)
expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
end
end

context 'JSON provider is discoverable while XML provider is not' do
Expand All @@ -49,6 +80,11 @@
expect(subject.endpoint_url).to eq 'https://host.test/provider.json'
expect(subject.format).to eq :json
end

it 'does not cache OEmbed endpoint' do
subject.call('https://host.test/oembed.html')
expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
end
end

context 'XML provider is discoverable while JSON provider is not' do
Expand All @@ -65,6 +101,11 @@
expect(subject.endpoint_url).to eq 'https://host.test/provider.xml'
expect(subject.format).to eq :xml
end

it 'does not cache OEmbed endpoint' do
subject.call('https://host.test/oembed.html')
expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
end
end

context 'Invalid XML provider is discoverable while JSON provider is not' do
Expand Down

0 comments on commit 257dfcb

Please sign in to comment.