From 01d396585e428ea1ca7e21868d7303a0bd8ffd6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Mon, 25 Jul 2022 16:20:12 +0200 Subject: [PATCH 1/4] Emoji: implement full-qualifier using combinations This implements fully_qualify_emoji/1, which will return the fully-qualified version of an emoji if it knows of one, or return the emoji unmodified if not. This code generates combinations per emoji: for each FE0F, all possible combinations of the character being removed or staying will be generated. This is made as an attempt to find all partially-qualified and unqualified versions of a fully-qualified emoji. I have found *no cases* for which this would be a problem, after browsing the entire emoji list in emoji-test.txt. This is safe, and, sadly, most likely the sanest too. --- lib/pleroma/emoji.ex | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/lib/pleroma/emoji.ex b/lib/pleroma/emoji.ex index 35f0da816..3726ef185 100644 --- a/lib/pleroma/emoji.ex +++ b/lib/pleroma/emoji.ex @@ -137,4 +137,49 @@ def is_unicode_emoji?(unquote(emoji)), do: true end def is_unicode_emoji?(_), do: false + + # FE0F is the emoji variation sequence. It is used for fully-qualifying + # emoji, and that includes emoji combinations. + # This code generates combinations per emoji: for each FE0F, all possible + # combinations of the character being removed or staying will be generated. + # This is made as an attempt to find all partially-qualified and unqualified + # versions of a fully-qualified emoji. + # I have found *no cases* for which this would be a problem, after browsing + # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most + # likely sane too. + emoji_qualification_map = + emojis + |> Enum.filter(&String.contains?(&1, "\uFE0F")) + |> Enum.map(fn emoji -> + combinate = fn x, combinate -> + case x do + [] -> + [[]] + + ["\uFE0F" | tail] -> + combinate.(tail, combinate) + |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) + + [codepoint | tail] -> + combinate.(tail, combinate) + |> Enum.map(fn x -> [codepoint | x] end) + end + end + + unqualified_list = + emoji + |> String.codepoints() + |> combinate.(combinate) + |> Enum.map(&List.to_string/1) + + {emoji, unqualified_list} + end) + + for {qualified, unqualified_list} <- emoji_qualification_map do + for unqualified <- unqualified_list do + def fully_qualify_emoji(unquote(unqualified)), do: unquote(qualified) + end + end + + def fully_qualify_emoji(emoji), do: emoji end From fb3f6e1975fc44414af66377061bf30ceee9f9b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Mon, 25 Jul 2022 16:49:23 +0200 Subject: [PATCH 2/4] EmojiReactValidator: use new qualification method --- .../emoji_react_validator.ex | 3 +- test/fixtures/emoji-reaction-unqualified.json | 30 ------------------- .../emoji_react_handling_test.exs | 13 +++++--- 3 files changed, 10 insertions(+), 36 deletions(-) delete mode 100644 test/fixtures/emoji-reaction-unqualified.json diff --git a/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex b/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex index 2eb4f6842..0858281e5 100644 --- a/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex @@ -63,8 +63,7 @@ defp fix(data) do end defp fix_emoji_qualification(%{"content" => emoji} = data) do - # Emoji variation sequence - new_emoji = emoji <> "\uFE0F" + new_emoji = Pleroma.Emoji.fully_qualify_emoji(emoji) cond do Pleroma.Emoji.is_unicode_emoji?(emoji) -> diff --git a/test/fixtures/emoji-reaction-unqualified.json b/test/fixtures/emoji-reaction-unqualified.json deleted file mode 100644 index 722fd7092..000000000 --- a/test/fixtures/emoji-reaction-unqualified.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "type": "EmojiReact", - "signature": { - "type": "RsaSignature2017", - "signatureValue": "fdxMfQSMwbC6wP6sh6neS/vM5879K67yQkHTbiT5Npr5wAac0y6+o3Ij+41tN3rL6wfuGTosSBTHOtta6R4GCOOhCaCSLMZKypnp1VltCzLDoyrZELnYQIC8gpUXVmIycZbREk22qWUe/w7DAFaKK4UscBlHDzeDVcA0K3Se5Sluqi9/Zh+ldAnEzj/rSEPDjrtvf5wGNf3fHxbKSRKFt90JvKK6hS+vxKUhlRFDf6/SMETw+EhwJSNW4d10yMUakqUWsFv4Acq5LW7l+HpYMvlYY1FZhNde1+uonnCyuQDyvzkff8zwtEJmAXC4RivO/VVLa17SmqheJZfI8oluVg==", - "creator": "http://mastodon.example.org/users/admin#main-key", - "created": "2018-02-17T18:57:49Z" - }, - "object": "http://localtesting.pleroma.lol/objects/eb92579d-3417-42a8-8652-2492c2d4f454", - "content": "❤", - "nickname": "lain", - "id": "http://mastodon.example.org/users/admin#reactions/2", - "actor": "http://mastodon.example.org/users/admin", - "@context": [ - "https://www.w3.org/ns/activitystreams", - "https://w3id.org/security/v1", - { - "toot": "http://joinmastodon.org/ns#", - "sensitive": "as:sensitive", - "ostatus": "http://ostatus.org#", - "movedTo": "as:movedTo", - "manuallyApprovesFollowers": "as:manuallyApprovesFollowers", - "inReplyToAtomUri": "ostatus:inReplyToAtomUri", - "conversation": "ostatus:conversation", - "atomUri": "ostatus:atomUri", - "Hashtag": "as:Hashtag", - "Emoji": "toot:Emoji" - } - ] -} diff --git a/test/pleroma/web/activity_pub/transmogrifier/emoji_react_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/emoji_react_handling_test.exs index 41d96fa66..9d99df27c 100644 --- a/test/pleroma/web/activity_pub/transmogrifier/emoji_react_handling_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier/emoji_react_handling_test.exs @@ -42,11 +42,15 @@ test "it works for incoming unqualified emoji reactions" do other_user = insert(:user, local: false) {:ok, activity} = CommonAPI.post(user, %{status: "hello"}) + # woman detective emoji, unqualified + unqualified_emoji = [0x1F575, 0x200D, 0x2640] |> List.to_string() + data = - File.read!("test/fixtures/emoji-reaction-unqualified.json") + File.read!("test/fixtures/emoji-reaction.json") |> Jason.decode!() |> Map.put("object", activity.data["object"]) |> Map.put("actor", other_user.ap_id) + |> Map.put("content", unqualified_emoji) {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) @@ -54,13 +58,14 @@ test "it works for incoming unqualified emoji reactions" do assert data["type"] == "EmojiReact" assert data["id"] == "http://mastodon.example.org/users/admin#reactions/2" assert data["object"] == activity.data["object"] - # heart emoji with added emoji variation sequence - assert data["content"] == "❤\uFE0F" + # woman detective emoji, fully qualified + emoji = [0x1F575, 0xFE0F, 0x200D, 0x2640, 0xFE0F] |> List.to_string() + assert data["content"] == emoji object = Object.get_by_ap_id(data["object"]) assert object.data["reaction_count"] == 1 - assert match?([["❤\uFE0F", _]], object.data["reactions"]) + assert match?([[emoji, _]], object.data["reactions"]) end test "it reject invalid emoji reactions" do From b99f5d61834ffd86f9e8aeca2b00c704f0a0467e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Tue, 26 Jul 2022 01:38:59 +0200 Subject: [PATCH 3/4] Emoji: split qualification variation into a module --- lib/pleroma/emoji.ex | 35 ++------------------------ lib/pleroma/emoji/combinations.ex | 41 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 lib/pleroma/emoji/combinations.ex diff --git a/lib/pleroma/emoji.ex b/lib/pleroma/emoji.ex index 3726ef185..dd65d56ae 100644 --- a/lib/pleroma/emoji.ex +++ b/lib/pleroma/emoji.ex @@ -9,6 +9,7 @@ defmodule Pleroma.Emoji do """ use GenServer + alias Pleroma.Emoji.Combinations alias Pleroma.Emoji.Loader require Logger @@ -138,42 +139,10 @@ def is_unicode_emoji?(unquote(emoji)), do: true def is_unicode_emoji?(_), do: false - # FE0F is the emoji variation sequence. It is used for fully-qualifying - # emoji, and that includes emoji combinations. - # This code generates combinations per emoji: for each FE0F, all possible - # combinations of the character being removed or staying will be generated. - # This is made as an attempt to find all partially-qualified and unqualified - # versions of a fully-qualified emoji. - # I have found *no cases* for which this would be a problem, after browsing - # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most - # likely sane too. emoji_qualification_map = emojis |> Enum.filter(&String.contains?(&1, "\uFE0F")) - |> Enum.map(fn emoji -> - combinate = fn x, combinate -> - case x do - [] -> - [[]] - - ["\uFE0F" | tail] -> - combinate.(tail, combinate) - |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) - - [codepoint | tail] -> - combinate.(tail, combinate) - |> Enum.map(fn x -> [codepoint | x] end) - end - end - - unqualified_list = - emoji - |> String.codepoints() - |> combinate.(combinate) - |> Enum.map(&List.to_string/1) - - {emoji, unqualified_list} - end) + |> Combinations.variate_emoji_qualification() for {qualified, unqualified_list} <- emoji_qualification_map do for unqualified <- unqualified_list do diff --git a/lib/pleroma/emoji/combinations.ex b/lib/pleroma/emoji/combinations.ex new file mode 100644 index 000000000..c49466406 --- /dev/null +++ b/lib/pleroma/emoji/combinations.ex @@ -0,0 +1,41 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Emoji.Combinations do + # FE0F is the emoji variation sequence. It is used for fully-qualifying + # emoji, and that includes emoji combinations. + # This code generates combinations per emoji: for each FE0F, all possible + # combinations of the character being removed or staying will be generated. + # This is made as an attempt to find all partially-qualified and unqualified + # versions of a fully-qualified emoji. + # I have found *no cases* for which this would be a problem, after browsing + # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most + # likely sane too. + + defp qualification_combinations([]), do: [[]] + + defp qualification_combinations(["\uFE0F" | tail]) do + tail + |> qualification_combinations() + |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) + end + + defp qualification_combinations([codepoint | tail]) do + tail + |> qualification_combinations() + |> Enum.map(fn x -> [codepoint | x] end) + end + + def variate_emoji_qualification(emoji) when is_binary(emoji) do + emoji + |> String.codepoints() + |> qualification_combinations() + |> Enum.map(&List.to_string/1) + end + + def variate_emoji_qualification(emoji) when is_list(emoji) do + emoji + |> Enum.map(fn emoji -> {emoji, variate_emoji_qualification(emoji)} end) + end +end From 7167de592e3523459a1eb65d902085e828f962b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Tue, 26 Jul 2022 23:15:09 +0200 Subject: [PATCH 4/4] Emoji: apply recommended tail call changes Behavior matches previous code. Co-authored-by: Tusooa Zhu --- lib/pleroma/emoji/combinations.ex | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/pleroma/emoji/combinations.ex b/lib/pleroma/emoji/combinations.ex index c49466406..981c73596 100644 --- a/lib/pleroma/emoji/combinations.ex +++ b/lib/pleroma/emoji/combinations.ex @@ -13,18 +13,22 @@ defmodule Pleroma.Emoji.Combinations do # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most # likely sane too. - defp qualification_combinations([]), do: [[]] - - defp qualification_combinations(["\uFE0F" | tail]) do - tail - |> qualification_combinations() - |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) + defp qualification_combinations(codepoints) do + qualification_combinations([[]], codepoints) end - defp qualification_combinations([codepoint | tail]) do - tail - |> qualification_combinations() - |> Enum.map(fn x -> [codepoint | x] end) + defp qualification_combinations(acc, []), do: acc + + defp qualification_combinations(acc, ["\uFE0F" | tail]) do + acc + |> Enum.flat_map(fn x -> [x, x ++ ["\uFE0F"]] end) + |> qualification_combinations(tail) + end + + defp qualification_combinations(acc, [codepoint | tail]) do + acc + |> Enum.map(&Kernel.++(&1, [codepoint])) + |> qualification_combinations(tail) end def variate_emoji_qualification(emoji) when is_binary(emoji) do