Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Request JSON for oEmbed requests. #10759

Merged
merged 6 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10759.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow configuration of the oEmbed URLs used for URL previews.
24 changes: 20 additions & 4 deletions synapse/config/oembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
import json
import re
from typing import Any, Dict, Iterable, List, Pattern
from typing import Any, Dict, Iterable, List, Optional, Pattern
from urllib import parse as urlparse

import attr
Expand All @@ -31,6 +31,8 @@ class OEmbedEndpointConfig:
api_endpoint: str
# The patterns to match.
url_patterns: List[Pattern]
# The supported formats.
formats: Optional[List[str]]


class OembedConfig(Config):
Expand Down Expand Up @@ -93,11 +95,22 @@ def _parse_and_validate_provider(
# might have multiple patterns to match.
for endpoint in provider["endpoints"]:
api_endpoint = endpoint["url"]

# The API endpoint must be an HTTP(S) URL.
results = urlparse.urlparse(api_endpoint)
if results.scheme not in {"http", "https"}:
raise ConfigError(
f"Insecure oEmbed scheme ({results.scheme}) for endpoint {api_endpoint}",
clokep marked this conversation as resolved.
Show resolved Hide resolved
config_path,
)

patterns = [
self._glob_to_pattern(glob, config_path)
for glob in endpoint["schemes"]
]
yield OEmbedEndpointConfig(api_endpoint, patterns)
yield OEmbedEndpointConfig(
api_endpoint, patterns, endpoint.get("formats")
)

def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
"""
Expand All @@ -114,9 +127,12 @@ def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
"""
results = urlparse.urlparse(glob)

# Ensure the scheme does not have wildcards (and is a sane scheme).
# The scheme must be HTTP(S) (and cannot contain wildcards).
if results.scheme not in {"http", "https"}:
raise ConfigError(f"Insecure oEmbed scheme: {results.scheme}", config_path)
raise ConfigError(
f"Insecure oEmbed scheme ({results.scheme}) for pattern: {glob}",
clokep marked this conversation as resolved.
Show resolved Hide resolved
config_path,
)

pattern = urlparse.urlunparse(
[
Expand Down
25 changes: 22 additions & 3 deletions synapse/rest/media/v1/oembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,23 @@ class OEmbedProvider:
def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
self._oembed_patterns = {}
for oembed_endpoint in hs.config.oembed.oembed_patterns:
api_endpoint = oembed_endpoint.api_endpoint

# Only JSON is supported at the moment. This could be declared in
# the formats field. Otherwise, if the endpoint ends in .xml assume
# it doesn't support JSON.
if (
oembed_endpoint.formats is not None
and "json" not in oembed_endpoint.formats
) or api_endpoint.endswith(".xml"):
logger.info(
f"Ignoring oEmbed endpoint due to not supporting JSON: {api_endpoint}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f"Ignoring oEmbed endpoint due to not supporting JSON: {api_endpoint}"
"Ignoring oEmbed endpoint due to not supporting JSON: %s", api_endpoint

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not an f-string here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because its logging, and using f-strings means that we unconditionally do the string interpolation rather than it being gated behind the log levels. Its not a huge deal, but it can add up (especially when we're interpolating variables with more expensive str functions)

)
continue

# Iterate through each URL pattern and point it to the endpoint.
for pattern in oembed_endpoint.url_patterns:
self._oembed_patterns[pattern] = oembed_endpoint.api_endpoint
self._oembed_patterns[pattern] = api_endpoint
self._client = client

def get_oembed_url(self, url: str) -> Optional[str]:
Expand Down Expand Up @@ -86,11 +101,15 @@ async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult:
"""
try:
logger.debug("Trying to get oEmbed content for url '%s'", url)

# Note that only the JSON format is supported, some endpoints want
# this in the URL, others want it as an argument.
endpoint = endpoint.replace("{format}", "json")

result = await self._client.get_json(
endpoint,
# TODO Specify max height / width.
# Note that only the JSON format is supported.
args={"url": url},
args={"url": url, "format": "json"},
)

# Ensure there's a version of 1.0.
Expand Down
55 changes: 54 additions & 1 deletion tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,15 @@ def make_homeserver(self, reactor, clock):
url_patterns=[
re.compile(r"http://twitter\.com/.+/status/.+"),
],
)
formats=None,
),
OEmbedEndpointConfig(
api_endpoint="http://www.hulu.com/api/oembed.{format}",
url_patterns=[
re.compile(r"http://www\.hulu\.com/watch/.+"),
],
formats=["json"],
),
]

return hs
Expand Down Expand Up @@ -656,3 +664,48 @@ def test_oembed_rich(self):
channel.json_body,
{"og:title": None, "og:description": "Content Preview"},
)

def test_oembed_format(self):
"""Test an oEmbed endpoint which requires the format in the URL."""
self.lookups["www.hulu.com"] = [(IPv4Address, "10.1.2.3")]

result = {
"version": "1.0",
"type": "rich",
"html": "<div>Content Preview</div>",
}
end_content = json.dumps(result).encode("utf-8")

channel = self.make_request(
"GET",
"preview_url?url=http://www.hulu.com/watch/12345",
shorthand=False,
await_result=False,
)
self.pump()

client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: application/json; charset="utf8"\r\n\r\n'
)
% (len(end_content),)
+ end_content
)

self.pump()

# The {format} should have been turned into json.
self.assertIn(b"/api/oembed.json", server.data)
# A URL parameter of format=json should be provided.
self.assertIn(b"format=json", server.data)

self.assertEqual(channel.code, 200)
self.assertEqual(
channel.json_body,
{"og:title": None, "og:description": "Content Preview"},
)