Skip to content

Commit

Permalink
atom.activities_to_atom: bug fix: avoid encoded </> characters in…
Browse files Browse the repository at this point in the history
… title

fixes #629
  • Loading branch information
snarfed committed Jan 22, 2024
1 parent c3e6f47 commit a5792ae
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 12 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ _Non-breaking changes:_
* Bug fix, emove incorrect `type="application/atom+xml"` from `rel="self"` `link` in `entry`.
* Render `objectType: comment` attachments.
* Remove invalid `<a>` element for tags.
* Bug fix: avoid encoded `<` and `>` characters in `title` ([#629](https://github.com/snarfed/granary/issues/629)).
* Bug fixes in `activity_to_atom`/`activities_to_atom` for dict-valued `url` fields.
* Render images in article/note attachments.
* Render `objectType: service` attachments, eg Bluesky custom feeds.
Expand Down
19 changes: 10 additions & 9 deletions granary/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,9 @@ def _prepare_activity(a, reader=True):
a (dict): ActivityStreams 1 activity
reader (bool): whether the output will be rendered in a feed reader.
Currently just includes location if True, not otherwise.
Returns:
``None``
"""
act_type = as1.object_type(a)
obj = as1.get_object(a) or a
Expand All @@ -390,16 +393,14 @@ def _prepare_activity(a, reader=True):
# https://forum.newsblur.com/t/android-cant-read-line-pre-formatted-lines/6116
white_space_pre=False))

# Make sure every activity has the title field, since Atom <entry> requires
# the title element.
if not a.get('title'):
a['title'] = util.ellipsize(_encode_ampersands(
a.get('displayName') or a.get('content') or obj.get('title') or
obj.get('displayName') or obj.get('content') or 'Untitled'))

# strip HTML tags. the Atom spec says title is plain text:
# Make sure every activity has displayName, since Atom <entry> requires the
# title element. and strip HTML tags, the Atom spec says title is plain text:
# http://atomenabled.org/developers/syndication/#requiredEntryElements
a['title'] = xml.sax.saxutils.escape(util.parse_html(a['title']).get_text(''))
display_name = (a.get('displayName') or a.get('content') or obj.get('title')
or obj.get('displayName') or obj.get('content') or 'Untitled')
a['displayName'] = util.ellipsize(xml.sax.saxutils.escape(
util.parse_html(display_name).get_text('')))


children = []
image_urls_seen = set()
Expand Down
2 changes: 1 addition & 1 deletion granary/templates/_entry.atom
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

{% set url = as1.get_url(activity) or as1.get_url(obj) %}
<id>{{ activity.id or obj.id or url }}</id>
<title>{{ activity.title|safe }}</title>
<title>{{ activity.displayName|safe }}</title>

{% if obj.summary %}
<summary>{{ obj.summary }}</summary>
Expand Down
11 changes: 9 additions & 2 deletions granary/tests/test_atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,17 @@ def test_title(self):

def test_strip_html_tags_from_titles(self):
activity = copy.deepcopy(test_facebook.ACTIVITY)
activity['displayName'] = '<p>foo &amp; <a href="http://bar">bar</a></p>'
activity['content'] = '<p>foo &amp; <a href="http://bar">bar</a></p>'
self.assert_multiline_in(
'<title>foo &amp; bar</title>\n',
atom.activities_to_atom([activity], test_facebook.ACTOR))
atom.activities_to_atom([activity], {}))

# ellipsize in the middle of an HTML tag. (ellipsize defaults to 14 words)
del activity['displayName']
activity['content'] = '<p>I’ve been looking over Mike Hoerger’s <a href="https://www.pmc19.com/data/index.php">Pandemic Mitigation Collaborative - Data Tracker</a> which estimates and projects...</p>'
self.assert_multiline_in(
'<title>I’ve been looking over Mike Hoerger’s Pandemic Mitigation Collaborative - Data Tracker which estimates...</title>\n',
atom.activities_to_atom([activity], {}))

def test_render_content_as_html(self):
self.assert_multiline_in(
Expand Down

0 comments on commit a5792ae

Please sign in to comment.