use scrub_html_and_truncate instead of scrub_html for feed item title

Sometimes this truncated properly encoded HTML entities in the
wrong place.  The new flow calls scrub_html, removes emojis,
decodes entities (a second time), truncates, and then re-encodes.

Fixes #3045.
This commit is contained in:
faried nawaz 2023-03-06 02:31:31 +05:00
parent 86ee4b72f3
commit 141146d1f1
2 changed files with 43 additions and 6 deletions

View file

@ -83,9 +83,8 @@ def activity_title(%{"content" => content} = data, opts \\ %{}) do
end
title
|> Pleroma.Web.Metadata.Utils.scrub_html()
|> Pleroma.Emoji.Formatter.demojify()
|> Formatter.truncate(opts[:max_length], opts[:omission])
|> Pleroma.Web.Metadata.Utils.scrub_html_and_truncate(opts[:max_length], opts[:omission])
|> HtmlEntities.encode()
end
def activity_description(data) do

View file

@ -57,9 +57,23 @@ defmodule Pleroma.Web.Feed.UserControllerTest do
)
note_activity2 = insert(:note_activity, note: note2)
note3 =
insert(:note,
user: user,
data: %{
"content" => "This note tests whether HTML entities are truncated properly",
"summary" => "Won't, didn't fail",
"inReplyTo" => note_activity2.id
}
)
_note_activity3 = insert(:note_activity, note: note3)
object = Object.normalize(note_activity, fetch: false)
[user: user, object: object, max_id: note_activity2.id]
encoded_title = FeedView.activity_title(note3.data)
[user: user, object: object, max_id: note_activity2.id, encoded_title: encoded_title]
end
test "gets an atom feed", %{conn: conn, user: user, object: object, max_id: max_id} do
@ -74,7 +88,7 @@ test "gets an atom feed", %{conn: conn, user: user, object: object, max_id: max_
|> SweetXml.parse()
|> SweetXml.xpath(~x"//entry/title/text()"l)
assert activity_titles == ['2hu', '2hu & as']
assert activity_titles == ['Won\'t, didn\'...', '2hu', '2hu & as']
assert resp =~ FeedView.escape(object.data["content"])
assert resp =~ FeedView.escape(object.data["summary"])
assert resp =~ FeedView.escape(object.data["context"])
@ -105,7 +119,7 @@ test "gets a rss feed", %{conn: conn, user: user, object: object, max_id: max_id
|> SweetXml.parse()
|> SweetXml.xpath(~x"//item/title/text()"l)
assert activity_titles == ['2hu', '2hu & as']
assert activity_titles == ['Won\'t, didn\'...', '2hu', '2hu & as']
assert resp =~ FeedView.escape(object.data["content"])
assert resp =~ FeedView.escape(object.data["summary"])
assert resp =~ FeedView.escape(object.data["context"])
@ -176,6 +190,30 @@ test "does not require authentication on non-federating instances", %{conn: conn
|> get("/users/#{user.nickname}/feed.rss")
|> response(200)
end
test "does not mangle HTML entities midway", %{
conn: conn,
user: user,
object: object,
encoded_title: encoded_title
} do
resp =
conn
|> put_req_header("accept", "application/atom+xml")
|> get(user_feed_path(conn, :feed, user.nickname))
|> response(200)
activity_titles =
resp
|> SweetXml.parse()
|> SweetXml.xpath(~x"//entry/title/text()"l)
assert activity_titles == ['Won\'t, didn\'...', '2hu', '2hu & as']
assert resp =~ FeedView.escape(object.data["content"])
assert resp =~ FeedView.escape(object.data["summary"])
assert resp =~ FeedView.escape(object.data["context"])
assert resp =~ encoded_title
end
end
# Note: see ActivityPubControllerTest for JSON format tests