Fix incorrectly cached content after editing

This commit is contained in:
Tusooa Zhu 2022-06-09 11:39:51 -04:00
parent aafd7a687d
commit c3593639ad
No known key found for this signature in database
GPG key ID: 7B467EDE43A08224
6 changed files with 119 additions and 16 deletions

View file

@ -8,6 +8,40 @@ defmodule Pleroma.Activity.HTML do
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
# We store a list of cache keys related to an activity in a
# separate cache, scrubber_management_cache. It has the same
# size as scrubber_cache (see application.ex). Every time we add
# a cache to scrubber_cache, we update scrubber_management_cache.
#
# The most recent write of a certain key in the management cache
# is the same as the most recent write of any record related to that
# key in the main cache.
# Assuming LRW ( https://hexdocs.pm/cachex/Cachex.Policy.LRW.html ),
# this means when the management cache is evicted by cachex, all
# related records in the main cache will also have been evicted.
defp get_cache_keys_for(activity_id) do
with {:ok, list} when is_list(list) <- @cachex.get(:scrubber_management_cache, activity_id) do
list
else
_ -> []
end
end
defp add_cache_key_for(activity_id, additional_key) do
current = get_cache_keys_for(activity_id)
unless additional_key in current do
@cachex.put(:scrubber_management_cache, activity_id, [additional_key | current])
end
end
def invalidate_cache_for(activity_id) do
keys = get_cache_keys_for(activity_id)
Enum.map(keys, &@cachex.del(:scrubber_cache, &1))
@cachex.del(:scrubber_management_cache, activity_id)
end
def get_cached_scrubbed_html_for_activity(
content,
scrubbers,
@ -19,6 +53,8 @@ def get_cached_scrubbed_html_for_activity(
@cachex.fetch!(:scrubber_cache, key, fn _key ->
object = Object.normalize(activity, fetch: false)
add_cache_key_for(activity.id, key)
HTML.ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
end)
end

View file

@ -189,6 +189,7 @@ defp cachex_children do
build_cachex("object", default_ttl: 25_000, ttl_interval: 1000, limit: 2500),
build_cachex("rich_media", default_ttl: :timer.minutes(120), limit: 5000),
build_cachex("scrubber", limit: 2500),
build_cachex("scrubber_management", limit: 2500),
build_cachex("idempotency", expiration: idempotency_expiration(), limit: 2500),
build_cachex("web_resp", limit: 2500),
build_cachex("emoji_packs", expiration: emoji_packs_expiration(), limit: 10),

View file

@ -455,7 +455,8 @@ defp handle_update_object(
%{data: %{"type" => "Update", "object" => updated_object}} = object,
meta
) do
orig_object = Object.get_by_ap_id(updated_object["id"])
orig_object_ap_id = updated_object["id"]
orig_object = Object.get_by_ap_id(orig_object_ap_id)
orig_object_data = orig_object.data
if orig_object_data["type"] in @updatable_object_types do
@ -468,15 +469,21 @@ defp handle_update_object(
|> Object.maybe_update_history(orig_object_data, updated)
|> maybe_update_poll(updated_object)
orig_object
|> Repo.preload(:hashtags)
|> Object.change(%{data: updated_object_data})
|> Object.update_and_set_cache()
changeset =
orig_object
|> Repo.preload(:hashtags)
|> Object.change(%{data: updated_object_data})
if updated do
object
|> Activity.normalize()
|> ActivityPub.notify_and_stream()
with {:ok, new_object} <- Repo.update(changeset),
{:ok, _} <- Object.invalid_object_cache(new_object),
{:ok, _} <- Object.set_cache(new_object),
# The metadata/utils.ex uses the object id for the cache.
{:ok, _} <- Pleroma.Activity.HTML.invalidate_cache_for(new_object.id) do
if updated do
object
|> Activity.normalize()
|> ActivityPub.notify_and_stream()
end
end
end

View file

@ -272,6 +272,16 @@ def render("show.json", %{activity: %{data: %{"object" => _object}} = activity}
reply_to_user = reply_to && CommonAPI.get_user(reply_to.data["actor"])
history_len =
1 +
(Object.history_for(object.data)
|> Map.get("orderedItems")
|> length())
# See render("history.json", ...) for more details
# Here the implicit index of the current content is 0
chrono_order = history_len - 1
content =
object
|> render_content()
@ -281,14 +291,14 @@ def render("show.json", %{activity: %{data: %{"object" => _object}} = activity}
|> Activity.HTML.get_cached_scrubbed_html_for_activity(
User.html_filter_policy(opts[:for]),
activity,
"mastoapi:content"
"mastoapi:content:#{chrono_order}"
)
content_plaintext =
content
|> Activity.HTML.get_cached_stripped_html_for_activity(
activity,
"mastoapi:content"
"mastoapi:content:#{chrono_order}"
)
summary = object.data["summary"] || ""
@ -410,9 +420,25 @@ def render("history.json", %{activity: %{data: %{"object" => _object}} = activit
history = [object | past_history]
history_len = length(history)
history =
Enum.with_index(
history,
fn object, index ->
%{
# The history is prepended every time there is a new edit.
# In chrono_order, the oldest item is always at 0, and so on.
# The chrono_order is an invariant kept between edits.
chrono_order: history_len - 1 - index,
object: object
}
end
)
individual_opts =
opts
|> Map.put(:as, :object)
|> Map.put(:as, :item)
|> Map.put(:user, user)
|> Map.put(:hashtags, hashtags)
@ -421,7 +447,12 @@ def render("history.json", %{activity: %{data: %{"object" => _object}} = activit
def render(
"history_item.json",
%{activity: activity, user: user, object: object, hashtags: hashtags} = opts
%{
activity: activity,
user: user,
item: %{object: object, chrono_order: chrono_order},
hashtags: hashtags
} = opts
) do
sensitive = object.data["sensitive"] || Enum.member?(hashtags, "nsfw")
@ -439,7 +470,7 @@ def render(
|> Activity.HTML.get_cached_scrubbed_html_for_activity(
User.html_filter_policy(opts[:for]),
activity,
"mastoapi:content"
"mastoapi:content:#{chrono_order}"
)
summary = object.data["summary"] || ""

View file

@ -2061,9 +2061,15 @@ test "it returns the source", %{conn: conn} do
oauth_access(["write:statuses"])
end
test "it updates the status", %{conn: conn, user: user} do
test "it updates the status" do
%{conn: conn, user: user} = oauth_access(["write:statuses", "read:statuses"])
{:ok, activity} = CommonAPI.post(user, %{status: "mew mew #abc", spoiler_text: "#def"})
conn
|> get("/api/v1/statuses/#{activity.id}")
|> json_response_and_validate_schema(200)
response =
conn
|> put_req_header("content-type", "application/json")
@ -2075,6 +2081,14 @@ test "it updates the status", %{conn: conn, user: user} do
assert response["content"] == "edited"
assert response["spoiler_text"] == "lol"
response =
conn
|> get("/api/v1/statuses/#{activity.id}")
|> json_response_and_validate_schema(200)
assert response["content"] == "edited"
assert response["spoiler_text"] == "lol"
end
test "it updates the attachments", %{conn: conn, user: user} do

View file

@ -3,7 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.Metadata.UtilsTest do
use Pleroma.DataCase, async: true
use Pleroma.DataCase, async: false
import Pleroma.Factory
alias Pleroma.Web.Metadata.Utils
@ -22,6 +22,20 @@ test "it returns text without encode HTML" do
assert Utils.scrub_html_and_truncate(note) == "Pleroma's really cool!"
end
test "it does not return old content after editing" do
user = insert(:user)
{:ok, activity} = Pleroma.Web.CommonAPI.post(user, %{status: "mew mew #def"})
object = Pleroma.Object.normalize(activity)
assert Utils.scrub_html_and_truncate(object) == "mew mew #def"
{:ok, update} = Pleroma.Web.CommonAPI.update(user, activity, %{status: "mew mew #abc"})
update = Pleroma.Activity.normalize(update)
object = Pleroma.Object.normalize(update)
assert Utils.scrub_html_and_truncate(object) == "mew mew #abc"
end
end
describe "scrub_html_and_truncate/2" do