diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 36a3694f2..4b997c048 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -53,6 +53,16 @@ def start(_type, _args) do ], id: :cachex_object ), + worker( + Cachex, + [ + :scrubber_cache, + [ + limit: 2500 + ] + ], + id: :cachex_scrubber + ), worker( Cachex, [ diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index a0473676b..eb31f131e 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -15,8 +15,11 @@ def get_scrubbers() do end def filter_tags(html, nil) do - get_scrubbers() - |> Enum.reduce(html, fn scrubber, html -> + filter_tags(html, get_scrubbers()) + end + + def filter_tags(html, scrubbers) when is_list(scrubbers) do + Enum.reduce(scrubbers, html, fn scrubber, html -> filter_tags(html, scrubber) end) end @@ -24,6 +27,40 @@ def filter_tags(html, nil) do def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber) def filter_tags(html), do: filter_tags(html, nil) def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags) + + def get_cached_scrubbed_html_for_object(content, scrubbers, object) do + key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}" + Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end) + end + + def get_cached_stripped_html_for_object(content, object) do + get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object) + end + + def ensure_scrubbed_html( + content, + scrubbers + ) do + {:commit, filter_tags(content, scrubbers)} + end + + defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do + generate_scrubber_signature([scrubber]) + end + + defp generate_scrubber_signature(scrubbers) do + Enum.reduce(scrubbers, "", fn scrubber, signature -> + # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0) + version = + if Kernel.function_exported?(scrubber, :version, 0) do + scrubber.version + else + 0 + end + + "#{signature}#{to_string(scrubber)}#{version}" + end) + end end defmodule Pleroma.HTML.Scrubber.TwitterText do @@ -39,6 +76,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() @@ -77,6 +118,10 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + @markup Application.get_env(:pleroma, :markup) @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @@ -154,6 +199,10 @@ defmodule Pleroma.HTML.Transform.MediaProxy do alias Pleroma.Web.MediaProxy + def version do + 0 + end + def before_scrub(html), do: html def scrub_attribute("img", {"src", "http" <> target}) do diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 5705098ea..b90a3efae 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -786,7 +786,9 @@ def html_filter_policy(%User{info: %{no_rich_text: true}}) do Pleroma.HTML.Scrubber.TwitterText end - def html_filter_policy(_), do: nil + @default_scrubbers Pleroma.Config.get([:markup, :scrub_policy]) + + def html_filter_policy(_), do: @default_scrubbers def get_or_fetch_by_ap_id(ap_id) do user = get_by_ap_id(ap_id) diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 4d4681da8..da61bbd86 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -120,7 +120,7 @@ def render("status.json", %{activity: %{data: %{"object" => object}} = activity} content = object |> render_content() - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index 592cf622f..469f780c7 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -11,11 +11,11 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do alias Pleroma.Web.TwitterAPI.TwitterAPI alias Pleroma.Web.TwitterAPI.Representers.ObjectRepresenter alias Pleroma.Activity + alias Pleroma.HTML alias Pleroma.Object alias Pleroma.User alias Pleroma.Repo alias Pleroma.Formatter - alias Pleroma.HTML import Ecto.Query require Logger @@ -245,14 +245,14 @@ def render( html = content - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) |> Formatter.emojify(object["emoji"]) text = if content do content |> String.replace(~r//, "\n") - |> HTML.strip_tags() + |> HTML.get_cached_stripped_html_for_object(activity) end reply_parent = Activity.get_in_reply_to_activity(activity) diff --git a/test/user_test.exs b/test/user_test.exs index 4680850ea..869e9196d 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -706,10 +706,10 @@ test "insert or update a user from given data" do end describe "per-user rich-text filtering" do - test "html_filter_policy returns nil when rich-text is enabled" do + test "html_filter_policy returns default policies, when rich-text is enabled" do user = insert(:user) - assert nil == User.html_filter_policy(user) + assert Pleroma.Config.get([:markup, :scrub_policy]) == User.html_filter_policy(user) end test "html_filter_policy returns TwitterText scrubber when rich-text is disabled" do