Merge branch 'fix/parser-url-bugs' into 'master'
Fix small parser bugs Closes #23, #21, #16, #15, #13, and #10 See merge request pleroma/elixir-libraries/linkify!24
This commit is contained in:
commit
5581840992
4 changed files with 172 additions and 54 deletions
|
@ -5,6 +5,8 @@
|
|||
### Fixed
|
||||
|
||||
- Hashtags followed by HTML tags "a", "code" and "pre" were not detected
|
||||
- Incorrect parsing of HTML links inside HTML tags
|
||||
- Punctuation marks in the end of urls were included in the html links
|
||||
|
||||
## 0.2.0 - 2020-07-21
|
||||
|
||||
|
|
|
@ -24,6 +24,8 @@ defmodule Linkify.Parser do
|
|||
|
||||
@match_skipped_tag ~r/^(?<tag>(a|code|pre)).*>*/
|
||||
|
||||
@delimiters ~r/[,.;:>]*$/
|
||||
|
||||
@prefix_extra [
|
||||
"magnet:?",
|
||||
"dweb://",
|
||||
|
@ -56,7 +58,7 @@ defmodule Linkify.Parser do
|
|||
~s{Check out <a href="http://google.com">google.com</a>}
|
||||
"""
|
||||
|
||||
@types [:url, :email, :hashtag, :mention, :extra]
|
||||
@types [:url, :email, :hashtag, :extra, :mention]
|
||||
|
||||
def parse(input, opts \\ %{})
|
||||
def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0)
|
||||
|
@ -83,9 +85,6 @@ defmodule Linkify.Parser do
|
|||
defp do_parse({"", user_acc}, _opts, {"", acc, _}),
|
||||
do: {Enum.reverse(acc), user_acc}
|
||||
|
||||
defp do_parse({"@" <> text, user_acc}, opts, {buffer, acc, :skip}),
|
||||
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "@"), :skip})
|
||||
|
||||
defp do_parse(
|
||||
{"<" <> text, user_acc},
|
||||
%{hashtag: true} = opts,
|
||||
|
@ -103,6 +102,11 @@ defmodule Linkify.Parser do
|
|||
end
|
||||
end
|
||||
|
||||
defp do_parse({"<br" <> text, user_acc}, opts, {buffer, acc, :parsing}) do
|
||||
{buffer, user_acc} = link(buffer, opts, user_acc)
|
||||
do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<br"), {:open, 1}})
|
||||
end
|
||||
|
||||
defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}),
|
||||
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<a"), :skip})
|
||||
|
||||
|
@ -124,43 +128,13 @@ defmodule Linkify.Parser do
|
|||
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}),
|
||||
do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}})
|
||||
|
||||
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}) do
|
||||
do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}})
|
||||
end
|
||||
|
||||
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}),
|
||||
do:
|
||||
do_parse(
|
||||
{text, user_acc},
|
||||
opts,
|
||||
{"", accumulate(acc, buffer, ">"), {:html, level}}
|
||||
)
|
||||
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, _level}}),
|
||||
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing})
|
||||
|
||||
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}) do
|
||||
do_parse({text, user_acc}, opts, {"", accumulate(acc, <<ch::8>>), {:attrs, level}})
|
||||
end
|
||||
|
||||
defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}) do
|
||||
{buffer, user_acc} = link(buffer, opts, user_acc)
|
||||
|
||||
do_parse(
|
||||
{text, user_acc},
|
||||
opts,
|
||||
{"", accumulate(acc, buffer, "</"), {:close, level}}
|
||||
)
|
||||
end
|
||||
|
||||
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}),
|
||||
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing})
|
||||
|
||||
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}),
|
||||
do:
|
||||
do_parse(
|
||||
{text, user_acc},
|
||||
opts,
|
||||
{"", accumulate(acc, buffer, ">"), {:html, level - 1}}
|
||||
)
|
||||
|
||||
defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}) do
|
||||
do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer), {:attrs, level}})
|
||||
end
|
||||
|
@ -194,18 +168,14 @@ defmodule Linkify.Parser do
|
|||
do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state})
|
||||
|
||||
def check_and_link(:url, buffer, opts, _user_acc) do
|
||||
str = strip_parens(buffer)
|
||||
|
||||
if url?(str, opts) do
|
||||
case @match_url |> Regex.run(str, capture: [:url]) |> hd() do
|
||||
if url?(buffer, opts) do
|
||||
case @match_url |> Regex.run(buffer, capture: [:url]) |> hd() do
|
||||
^buffer ->
|
||||
link_url(buffer, opts)
|
||||
|
||||
url ->
|
||||
buffer
|
||||
|> String.split(url)
|
||||
|> Enum.intersperse(link_url(url, opts))
|
||||
|> if(opts[:iodata], do: & &1, else: &Enum.join(&1)).()
|
||||
link = link_url(url, opts)
|
||||
restore_stripped_symbols(buffer, url, link)
|
||||
end
|
||||
else
|
||||
:nomatch
|
||||
|
@ -228,19 +198,21 @@ defmodule Linkify.Parser do
|
|||
|> link_hashtag(buffer, opts, user_acc)
|
||||
end
|
||||
|
||||
def check_and_link(:extra, "xmpp:" <> handle, opts, _user_acc) do
|
||||
if email?(handle, opts), do: link_extra("xmpp:" <> handle, opts), else: handle
|
||||
def check_and_link(:extra, "xmpp:" <> handle = buffer, opts, _user_acc) do
|
||||
if email?(handle, opts), do: link_extra(buffer, opts), else: :nomatch
|
||||
end
|
||||
|
||||
def check_and_link(:extra, buffer, opts, _user_acc) do
|
||||
if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: :nomatch
|
||||
end
|
||||
|
||||
defp strip_parens("(" <> buffer) do
|
||||
~r/[^\)]*/ |> Regex.run(buffer) |> hd()
|
||||
defp strip_parens(buffer) do
|
||||
buffer
|
||||
|> String.trim_leading("(")
|
||||
|> String.trim_trailing(")")
|
||||
end
|
||||
|
||||
defp strip_parens(buffer), do: buffer
|
||||
defp strip_punctuation(buffer), do: String.replace(buffer, @delimiters, "")
|
||||
|
||||
def url?(buffer, opts) do
|
||||
valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer, opts)
|
||||
|
@ -360,10 +332,30 @@ defmodule Linkify.Parser do
|
|||
end
|
||||
|
||||
defp check_and_link_reducer(type, buffer, opts, user_acc) do
|
||||
case check_and_link(type, buffer, opts, user_acc) do
|
||||
:nomatch -> {:cont, {buffer, user_acc}}
|
||||
{buffer, user_acc} -> {:halt, {buffer, user_acc}}
|
||||
buffer -> {:halt, {buffer, user_acc}}
|
||||
str =
|
||||
buffer
|
||||
|> String.split("<")
|
||||
|> List.first()
|
||||
|> strip_punctuation()
|
||||
|> strip_parens()
|
||||
|
||||
case check_and_link(type, str, opts, user_acc) do
|
||||
:nomatch ->
|
||||
{:cont, {buffer, user_acc}}
|
||||
|
||||
{link, user_acc} ->
|
||||
{:halt, {restore_stripped_symbols(buffer, str, link), user_acc}}
|
||||
|
||||
link ->
|
||||
{:halt, {restore_stripped_symbols(buffer, str, link), user_acc}}
|
||||
end
|
||||
end
|
||||
|
||||
defp restore_stripped_symbols(buffer, buffer, link), do: link
|
||||
|
||||
defp restore_stripped_symbols(buffer, stripped_buffer, link) do
|
||||
buffer
|
||||
|> String.split(stripped_buffer)
|
||||
|> Enum.intersperse(link)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -282,6 +282,63 @@ defmodule LinkifyTest do
|
|||
|
||||
assert mentions |> MapSet.to_list() |> Enum.map(&elem(&1, 1)) == ["user"]
|
||||
end
|
||||
|
||||
test "mentions handler and extra links" do
|
||||
text =
|
||||
"hi @user, text me asap xmpp:me@cofe.ai, (or contact me at me@cofe.ai), please.<br>cofe.ai."
|
||||
|
||||
valid_users = ["user", "cofe"]
|
||||
|
||||
handler = fn "@" <> user = mention, buffer, _opts, acc ->
|
||||
if Enum.member?(valid_users, user) do
|
||||
link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>)
|
||||
{link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}}
|
||||
else
|
||||
{buffer, acc}
|
||||
end
|
||||
end
|
||||
|
||||
{result_text, %{mentions: mentions}} =
|
||||
Linkify.link_map(text, %{mentions: MapSet.new()},
|
||||
mention: true,
|
||||
mention_handler: handler,
|
||||
extra: true,
|
||||
email: true
|
||||
)
|
||||
|
||||
assert result_text ==
|
||||
"hi <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, text me asap <a href=\"xmpp:me@cofe.ai\">xmpp:me@cofe.ai</a>, (or contact me at <a href=\"mailto:me@cofe.ai\">me@cofe.ai</a>), please.<br><a href=\"http://cofe.ai\">cofe.ai</a>."
|
||||
|
||||
assert MapSet.to_list(mentions) == [{"@user", "user"}]
|
||||
end
|
||||
|
||||
test "mentions handler and emails" do
|
||||
text = "hi @friend, here is my email<br><br>user@user.me"
|
||||
|
||||
valid_users = ["user", "friend"]
|
||||
|
||||
handler = fn "@" <> user = mention, buffer, _opts, acc ->
|
||||
if Enum.member?(valid_users, user) do
|
||||
link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>)
|
||||
{link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}}
|
||||
else
|
||||
{buffer, acc}
|
||||
end
|
||||
end
|
||||
|
||||
{result_text, %{mentions: mentions}} =
|
||||
Linkify.link_map(text, %{mentions: MapSet.new()},
|
||||
mention: true,
|
||||
mention_handler: handler,
|
||||
extra: true,
|
||||
email: true
|
||||
)
|
||||
|
||||
assert result_text ==
|
||||
"hi <a href=\"https://example.com/user/friend\" data-user=\"friend\">@friend</a>, here is my email<br><br><a href=\"mailto:user@user.me\">user@user.me</a>"
|
||||
|
||||
assert MapSet.to_list(mentions) == [{"@friend", "friend"}]
|
||||
end
|
||||
end
|
||||
|
||||
describe "mentions" do
|
||||
|
@ -306,7 +363,7 @@ defmodule LinkifyTest do
|
|||
assert Linkify.link(text, mention: true, mention_prefix: "u/") == expected
|
||||
end
|
||||
|
||||
test "metion @user@example.com" do
|
||||
test "mention @user@example.com" do
|
||||
text = "hey @user@example.com"
|
||||
|
||||
expected =
|
||||
|
@ -317,6 +374,16 @@ defmodule LinkifyTest do
|
|||
mention_prefix: "https://example.com/user/",
|
||||
new_window: true
|
||||
) == expected
|
||||
|
||||
text = "That's @user@example.com's server"
|
||||
|
||||
expected =
|
||||
"That's <a href=\"https://example.com/user/user@example.com\">@user@example.com</a>'s server"
|
||||
|
||||
assert Linkify.link(text,
|
||||
mention: true,
|
||||
mention_prefix: "https://example.com/user/"
|
||||
) == expected
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -492,6 +559,12 @@ defmodule LinkifyTest do
|
|||
assert Linkify.link(text, extra: true) == expected
|
||||
end
|
||||
|
||||
test "wrong xmpp" do
|
||||
text = "xmpp:user.example.com"
|
||||
|
||||
assert Linkify.link(text, extra: true) == text
|
||||
end
|
||||
|
||||
test "email" do
|
||||
text = "user@example.com"
|
||||
expected = "<a href=\"mailto:user@example.com\">user@example.com</a>"
|
||||
|
|
|
@ -114,6 +114,26 @@ defmodule Linkify.ParserTest do
|
|||
assert parse(text) == expected
|
||||
end
|
||||
|
||||
test "handle angle bracket in the end" do
|
||||
text = "google.com <br>"
|
||||
assert parse(text) == "<a href=\"http://google.com\">google.com</a> <br>"
|
||||
|
||||
text = "google.com<br>hey"
|
||||
assert parse(text) == "<a href=\"http://google.com\">google.com</a><br>hey"
|
||||
|
||||
text = "hey<br>google.com"
|
||||
assert parse(text) == "hey<br><a href=\"http://google.com\">google.com</a>"
|
||||
|
||||
text = "<br />google.com"
|
||||
assert parse(text) == "<br /><a href=\"http://google.com\">google.com</a>"
|
||||
|
||||
text = "google.com<"
|
||||
assert parse(text) == "<a href=\"http://google.com\">google.com</a><"
|
||||
|
||||
text = "google.com>"
|
||||
assert parse(text) == "<a href=\"http://google.com\">google.com</a>>"
|
||||
end
|
||||
|
||||
test "does not link attributes" do
|
||||
text = "Check out <a href='google.com'>google</a>"
|
||||
assert parse(text) == text
|
||||
|
@ -155,6 +175,20 @@ defmodule Linkify.ParserTest do
|
|||
assert parse(text, class: false, rel: false) == expected
|
||||
end
|
||||
|
||||
test "html links inside html" do
|
||||
text = ~s(<p><a href="http://google.com">google.com</a></p>)
|
||||
assert parse(text) == text
|
||||
|
||||
text = ~s(<span><a href="http://google.com">google.com</a></span>)
|
||||
assert parse(text) == text
|
||||
|
||||
text = ~s(<h1><a href="http://google.com">google.com</a></h1>)
|
||||
assert parse(text) == text
|
||||
|
||||
text = ~s(<li><a href="http://google.com">google.com</a></li>)
|
||||
assert parse(text) == text
|
||||
end
|
||||
|
||||
test "do not link parens" do
|
||||
text = " foo (https://example.com/path/folder/), bar"
|
||||
|
||||
|
@ -171,6 +205,23 @@ defmodule Linkify.ParserTest do
|
|||
assert parse(text, class: false, rel: false) == expected
|
||||
end
|
||||
|
||||
test "do not link punctuation marks in the end" do
|
||||
text = "google.com."
|
||||
assert parse(text) == "<a href=\"http://google.com\">google.com</a>."
|
||||
|
||||
text = "google.com;"
|
||||
assert parse(text) == "<a href=\"http://google.com\">google.com</a>;"
|
||||
|
||||
text = "google.com:"
|
||||
assert parse(text) == "<a href=\"http://google.com\">google.com</a>:"
|
||||
|
||||
text = "hack google.com, please"
|
||||
assert parse(text) == "hack <a href=\"http://google.com\">google.com</a>, please"
|
||||
|
||||
text = "(check out google.com)"
|
||||
assert parse(text) == "(check out <a href=\"http://google.com\">google.com</a>)"
|
||||
end
|
||||
|
||||
test "do not link urls" do
|
||||
text = "google.com"
|
||||
assert parse(text, url: false) == text
|
||||
|
|
Loading…
Reference in a new issue