Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Request JSON for oEmbed requests (and ignore XML only providers). (#1…
Browse files Browse the repository at this point in the history
…0759)

This adds the format to the request arguments / URL to
ensure that JSON data is returned (which is all that
Synapse supports).

This also adds additional error checking / filtering to the
configuration file to ignore XML-only providers.
  • Loading branch information
clokep authored Sep 8, 2021
1 parent aacdce8 commit 580a15e
Showing 4 changed files with 98 additions and 8 deletions.
1 change: 1 addition & 0 deletions changelog.d/10759.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow configuration of the oEmbed URLs used for URL previews.
24 changes: 20 additions & 4 deletions synapse/config/oembed.py
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
# limitations under the License.
import json
import re
from typing import Any, Dict, Iterable, List, Pattern
from typing import Any, Dict, Iterable, List, Optional, Pattern
from urllib import parse as urlparse

import attr
@@ -31,6 +31,8 @@ class OEmbedEndpointConfig:
api_endpoint: str
# The patterns to match.
url_patterns: List[Pattern]
# The supported formats.
formats: Optional[List[str]]


class OembedConfig(Config):
@@ -93,11 +95,22 @@ def _parse_and_validate_provider(
# might have multiple patterns to match.
for endpoint in provider["endpoints"]:
api_endpoint = endpoint["url"]

# The API endpoint must be an HTTP(S) URL.
results = urlparse.urlparse(api_endpoint)
if results.scheme not in {"http", "https"}:
raise ConfigError(
f"Unsupported oEmbed scheme ({results.scheme}) for endpoint {api_endpoint}",
config_path,
)

patterns = [
self._glob_to_pattern(glob, config_path)
for glob in endpoint["schemes"]
]
yield OEmbedEndpointConfig(api_endpoint, patterns)
yield OEmbedEndpointConfig(
api_endpoint, patterns, endpoint.get("formats")
)

def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
"""
@@ -114,9 +127,12 @@ def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
"""
results = urlparse.urlparse(glob)

# Ensure the scheme does not have wildcards (and is a sane scheme).
# The scheme must be HTTP(S) (and cannot contain wildcards).
if results.scheme not in {"http", "https"}:
raise ConfigError(f"Insecure oEmbed scheme: {results.scheme}", config_path)
raise ConfigError(
f"Unsupported oEmbed scheme ({results.scheme}) for pattern: {glob}",
config_path,
)

pattern = urlparse.urlunparse(
[
26 changes: 23 additions & 3 deletions synapse/rest/media/v1/oembed.py
Original file line number Diff line number Diff line change
@@ -49,8 +49,24 @@ class OEmbedProvider:
def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
self._oembed_patterns = {}
for oembed_endpoint in hs.config.oembed.oembed_patterns:
api_endpoint = oembed_endpoint.api_endpoint

# Only JSON is supported at the moment. This could be declared in
# the formats field. Otherwise, if the endpoint ends in .xml assume
# it doesn't support JSON.
if (
oembed_endpoint.formats is not None
and "json" not in oembed_endpoint.formats
) or api_endpoint.endswith(".xml"):
logger.info(
"Ignoring oEmbed endpoint due to not supporting JSON: %s",
api_endpoint,
)
continue

# Iterate through each URL pattern and point it to the endpoint.
for pattern in oembed_endpoint.url_patterns:
self._oembed_patterns[pattern] = oembed_endpoint.api_endpoint
self._oembed_patterns[pattern] = api_endpoint
self._client = client

def get_oembed_url(self, url: str) -> Optional[str]:
@@ -86,11 +102,15 @@ async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult:
"""
try:
logger.debug("Trying to get oEmbed content for url '%s'", url)

# Note that only the JSON format is supported, some endpoints want
# this in the URL, others want it as an argument.
endpoint = endpoint.replace("{format}", "json")

result = await self._client.get_json(
endpoint,
# TODO Specify max height / width.
# Note that only the JSON format is supported.
args={"url": url},
args={"url": url, "format": "json"},
)

# Ensure there's a version of 1.0.
55 changes: 54 additions & 1 deletion tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
@@ -92,7 +92,15 @@ def make_homeserver(self, reactor, clock):
url_patterns=[
re.compile(r"http://twitter\.com/.+/status/.+"),
],
)
formats=None,
),
OEmbedEndpointConfig(
api_endpoint="http://www.hulu.com/api/oembed.{format}",
url_patterns=[
re.compile(r"http://www\.hulu\.com/watch/.+"),
],
formats=["json"],
),
]

return hs
@@ -656,3 +664,48 @@ def test_oembed_rich(self):
channel.json_body,
{"og:title": None, "og:description": "Content Preview"},
)

def test_oembed_format(self):
"""Test an oEmbed endpoint which requires the format in the URL."""
self.lookups["www.hulu.com"] = [(IPv4Address, "10.1.2.3")]

result = {
"version": "1.0",
"type": "rich",
"html": "<div>Content Preview</div>",
}
end_content = json.dumps(result).encode("utf-8")

channel = self.make_request(
"GET",
"preview_url?url=http://www.hulu.com/watch/12345",
shorthand=False,
await_result=False,
)
self.pump()

client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: application/json; charset="utf8"\r\n\r\n'
)
% (len(end_content),)
+ end_content
)

self.pump()

# The {format} should have been turned into json.
self.assertIn(b"/api/oembed.json", server.data)
# A URL parameter of format=json should be provided.
self.assertIn(b"format=json", server.data)

self.assertEqual(channel.code, 200)
self.assertEqual(
channel.json_body,
{"og:title": None, "og:description": "Content Preview"},
)

0 comments on commit 580a15e

Please sign in to comment.