From e5a8cf2b08edfe22f776d9c5f500029747daf1c8 Mon Sep 17 00:00:00 2001 From: Sergey Suprunenko Date: Sat, 29 Aug 2020 23:29:07 +0200 Subject: [PATCH 1/5] Do not parse html links inside html tags --- lib/linkify/parser.ex | 15 +++++++-------- test/parser_test.exs | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index a8bcf57..38eca8c 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -128,13 +128,8 @@ defmodule Linkify.Parser do do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}) end - defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}), - do: - do_parse( - {text, user_acc}, - opts, - {"", accumulate(acc, buffer, ">"), {:html, level}} - ) + defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, _level}}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing}) defp do_parse({<> <> text, user_acc}, opts, {"", acc, {:attrs, level}}) do do_parse({text, user_acc}, opts, {"", accumulate(acc, <>), {:attrs, level}}) @@ -194,7 +189,11 @@ defmodule Linkify.Parser do do: do_parse({text, user_acc}, opts, {buffer <> <>, acc, state}) def check_and_link(:url, buffer, opts, _user_acc) do - str = strip_parens(buffer) + str = + buffer + |> String.split("<") + |> List.first() + |> strip_parens() if url?(str, opts) do case @match_url |> Regex.run(str, capture: [:url]) |> hd() do diff --git a/test/parser_test.exs b/test/parser_test.exs index 8aa7ed6..718be90 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -155,6 +155,20 @@ defmodule Linkify.ParserTest do assert parse(text, class: false, rel: false) == expected end + test "html links inside html" do + text = ~s(

google.com

) + assert parse(text) == text + + text = ~s(google.com) + assert parse(text) == text + + text = ~s(

google.com

) + assert parse(text) == text + + text = ~s(
  • google.com
  • ) + assert parse(text) == text + end + test "do not link parens" do text = " foo (https://example.com/path/folder/), bar" From 74afaca73b6b688d87e48a88ee8e8e3ee8421294 Mon Sep 17 00:00:00 2001 From: Sergey Suprunenko Date: Sat, 29 Aug 2020 23:30:43 +0200 Subject: [PATCH 2/5] Handle punctuation marks and angle bracket in the end of a link --- CHANGELOG.md | 2 ++ lib/linkify/parser.ex | 1 + test/parser_test.exs | 31 +++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d86357..adfc4a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ ### Fixed - Hashtags followed by HTML tags "a", "code" and "pre" were not detected +- Incorrect parsing of HTML links inside HTML tags +- Punctuation marks in the end of urls were included in the html links ## 0.2.0 - 2020-07-21 diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 38eca8c..dbb27fb 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -193,6 +193,7 @@ defmodule Linkify.Parser do buffer |> String.split("<") |> List.first() + |> String.replace(~r/[,.;:)>]$/, "") |> strip_parens() if url?(str, opts) do diff --git a/test/parser_test.exs b/test/parser_test.exs index 718be90..352f237 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -114,6 +114,20 @@ defmodule Linkify.ParserTest do assert parse(text) == expected end + test "handle angle bracket in the end" do + text = "google.com
    " + assert parse(text) == "google.com
    " + + text = "google.com
    " + assert parse(text) == "google.com
    " + + text = "google.com<" + assert parse(text) == "google.com<" + + text = "google.com>" + assert parse(text) == "google.com>" + end + test "does not link attributes" do text = "Check out google" assert parse(text) == text @@ -185,6 +199,23 @@ defmodule Linkify.ParserTest do assert parse(text, class: false, rel: false) == expected end + test "do not link punctuation marks in the end" do + text = "google.com." + assert parse(text) == "google.com." + + text = "google.com;" + assert parse(text) == "google.com;" + + text = "google.com:" + assert parse(text) == "google.com:" + + text = "hack google.com, please" + assert parse(text) == "hack google.com, please" + + text = "(check out google.com)" + assert parse(text) == "(check out google.com)" + end + test "do not link urls" do text = "google.com" assert parse(text, url: false) == text From 8d88833dc5cbcc64e1e3ebfe82aa4d960059adb8 Mon Sep 17 00:00:00 2001 From: Sergey Suprunenko Date: Tue, 1 Sep 2020 13:31:21 +0200 Subject: [PATCH 3/5] Remove unused clauses --- lib/linkify/parser.ex | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index dbb27fb..023b3b5 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -83,9 +83,6 @@ defmodule Linkify.Parser do defp do_parse({"", user_acc}, _opts, {"", acc, _}), do: {Enum.reverse(acc), user_acc} - defp do_parse({"@" <> text, user_acc}, opts, {buffer, acc, :skip}), - do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "@"), :skip}) - defp do_parse( {"<" <> text, user_acc}, %{hashtag: true} = opts, @@ -124,10 +121,6 @@ defmodule Linkify.Parser do defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}), do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}) - defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}) do - do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}) - end - defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, _level}}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing}) @@ -135,27 +128,6 @@ defmodule Linkify.Parser do do_parse({text, user_acc}, opts, {"", accumulate(acc, <>), {:attrs, level}}) end - defp do_parse({" text, user_acc}, opts, {buffer, acc, {:html, level}}) do - {buffer, user_acc} = link(buffer, opts, user_acc) - - do_parse( - {text, user_acc}, - opts, - {"", accumulate(acc, buffer, "" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}), - do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing}) - - defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}), - do: - do_parse( - {text, user_acc}, - opts, - {"", accumulate(acc, buffer, ">"), {:html, level - 1}} - ) - defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}) do do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer), {:attrs, level}}) end From 7005cda9e48b746b1ce2004fc921eb75fc8d9310 Mon Sep 17 00:00:00 2001 From: Sergey Suprunenko Date: Wed, 2 Sep 2020 13:18:02 +0200 Subject: [PATCH 4/5] Handle punctuation marks and new lines --- lib/linkify/parser.ex | 67 +++++++++++++++++++++++++------------- test/linkify_test.exs | 75 ++++++++++++++++++++++++++++++++++++++++++- test/parser_test.exs | 10 ++++-- 3 files changed, 126 insertions(+), 26 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 023b3b5..3074be3 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -24,6 +24,8 @@ defmodule Linkify.Parser do @match_skipped_tag ~r/^(?(a|code|pre)).*>*/ + @delimiters ~r/[,.;:>]*$/ + @prefix_extra [ "magnet:?", "dweb://", @@ -56,7 +58,7 @@ defmodule Linkify.Parser do ~s{Check out google.com} """ - @types [:url, :email, :hashtag, :mention, :extra] + @types [:url, :email, :hashtag, :extra, :mention] def parse(input, opts \\ %{}) def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0) @@ -100,6 +102,11 @@ defmodule Linkify.Parser do end end + defp do_parse({" text, user_acc}, opts, {buffer, acc, :parsing}) do + {buffer, user_acc} = link(buffer, opts, user_acc) + do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, " text, user_acc}, opts, {buffer, acc, :parsing}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, " <>, acc, state}) def check_and_link(:url, buffer, opts, _user_acc) do - str = - buffer - |> String.split("<") - |> List.first() - |> String.replace(~r/[,.;:)>]$/, "") - |> strip_parens() - - if url?(str, opts) do - case @match_url |> Regex.run(str, capture: [:url]) |> hd() do + if url?(buffer, opts) do + case @match_url |> Regex.run(buffer, capture: [:url]) |> hd() do ^buffer -> link_url(buffer, opts) url -> - buffer - |> String.split(url) - |> Enum.intersperse(link_url(url, opts)) - |> if(opts[:iodata], do: & &1, else: &Enum.join(&1)).() + link = link_url(url, opts) + restore_stripped_symbols(buffer, url, link, opts) end else :nomatch @@ -200,19 +198,21 @@ defmodule Linkify.Parser do |> link_hashtag(buffer, opts, user_acc) end - def check_and_link(:extra, "xmpp:" <> handle, opts, _user_acc) do - if email?(handle, opts), do: link_extra("xmpp:" <> handle, opts), else: handle + def check_and_link(:extra, "xmpp:" <> handle = buffer, opts, _user_acc) do + if email?(handle, opts), do: link_extra(buffer, opts), else: :nomatch end def check_and_link(:extra, buffer, opts, _user_acc) do if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: :nomatch end - defp strip_parens("(" <> buffer) do - ~r/[^\)]*/ |> Regex.run(buffer) |> hd() + defp strip_parens(buffer) do + buffer + |> String.trim_leading("(") + |> String.trim_trailing(")") end - defp strip_parens(buffer), do: buffer + defp strip_punctuation(buffer), do: String.replace(buffer, @delimiters, "") def url?(buffer, opts) do valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer, opts) @@ -332,10 +332,31 @@ defmodule Linkify.Parser do end defp check_and_link_reducer(type, buffer, opts, user_acc) do - case check_and_link(type, buffer, opts, user_acc) do - :nomatch -> {:cont, {buffer, user_acc}} - {buffer, user_acc} -> {:halt, {buffer, user_acc}} - buffer -> {:halt, {buffer, user_acc}} + str = + buffer + |> String.split("<") + |> List.first() + |> strip_punctuation() + |> strip_parens() + + case check_and_link(type, str, opts, user_acc) do + :nomatch -> + {:cont, {buffer, user_acc}} + + {link, user_acc} -> + {:halt, {restore_stripped_symbols(buffer, str, link, opts), user_acc}} + + link -> + {:halt, {restore_stripped_symbols(buffer, str, link, opts), user_acc}} end end + + defp restore_stripped_symbols(buffer, buffer, link, _), do: link + + defp restore_stripped_symbols(buffer, stripped_buffer, link, opts) do + buffer + |> String.split(stripped_buffer) + |> Enum.intersperse(link) + |> if(opts[:iodata], do: &Enum.reject(&1, fn el -> el == "" end), else: &Enum.join(&1)).() + end end diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 9eece61..b5059f6 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -282,6 +282,63 @@ defmodule LinkifyTest do assert mentions |> MapSet.to_list() |> Enum.map(&elem(&1, 1)) == ["user"] end + + test "mentions handler and extra links" do + text = + "hi @user, text me asap xmpp:me@cofe.ai, (or contact me at me@cofe.ai), please.
    cofe.ai." + + valid_users = ["user", "cofe"] + + handler = fn "@" <> user = mention, buffer, _opts, acc -> + if Enum.member?(valid_users, user) do + link = ~s(#{mention}) + {link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}} + else + {buffer, acc} + end + end + + {result_text, %{mentions: mentions}} = + Linkify.link_map(text, %{mentions: MapSet.new()}, + mention: true, + mention_handler: handler, + extra: true, + email: true + ) + + assert result_text == + "hi @user, text me asap xmpp:me@cofe.ai, (or contact me at me@cofe.ai), please.
    cofe.ai." + + assert MapSet.to_list(mentions) == [{"@user", "user"}] + end + + test "mentions handler and emails" do + text = "hi @friend, here is my email

    user@user.me" + + valid_users = ["user", "friend"] + + handler = fn "@" <> user = mention, buffer, _opts, acc -> + if Enum.member?(valid_users, user) do + link = ~s(#{mention}) + {link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}} + else + {buffer, acc} + end + end + + {result_text, %{mentions: mentions}} = + Linkify.link_map(text, %{mentions: MapSet.new()}, + mention: true, + mention_handler: handler, + extra: true, + email: true + ) + + assert result_text == + "hi @friend, here is my email

    user@user.me" + + assert MapSet.to_list(mentions) == [{"@friend", "friend"}] + end end describe "mentions" do @@ -306,7 +363,7 @@ defmodule LinkifyTest do assert Linkify.link(text, mention: true, mention_prefix: "u/") == expected end - test "metion @user@example.com" do + test "mention @user@example.com" do text = "hey @user@example.com" expected = @@ -317,6 +374,16 @@ defmodule LinkifyTest do mention_prefix: "https://example.com/user/", new_window: true ) == expected + + text = "That's @user@example.com's server" + + expected = + "That's @user@example.com's server" + + assert Linkify.link(text, + mention: true, + mention_prefix: "https://example.com/user/" + ) == expected end end @@ -492,6 +559,12 @@ defmodule LinkifyTest do assert Linkify.link(text, extra: true) == expected end + test "wrong xmpp" do + text = "xmpp:user.example.com" + + assert Linkify.link(text, extra: true) == text + end + test "email" do text = "user@example.com" expected = "user@example.com" diff --git a/test/parser_test.exs b/test/parser_test.exs index 352f237..8692f46 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -118,8 +118,14 @@ defmodule Linkify.ParserTest do text = "google.com
    " assert parse(text) == "google.com
    " - text = "google.com
    " - assert parse(text) == "google.com
    " + text = "google.com
    hey" + assert parse(text) == "google.com
    hey" + + text = "hey
    google.com" + assert parse(text) == "hey
    google.com" + + text = "
    google.com" + assert parse(text) == "
    google.com" text = "google.com<" assert parse(text) == "google.com<" From c4b4cb95fbd8ddc60dc259015f3088d415553c03 Mon Sep 17 00:00:00 2001 From: Sergey Suprunenko Date: Thu, 10 Sep 2020 20:15:49 +0200 Subject: [PATCH 5/5] Linkify.Parser.restore_stripped_symbols/3 must always return iodata --- lib/linkify/parser.ex | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 3074be3..1e5c3db 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -175,7 +175,7 @@ defmodule Linkify.Parser do url -> link = link_url(url, opts) - restore_stripped_symbols(buffer, url, link, opts) + restore_stripped_symbols(buffer, url, link) end else :nomatch @@ -344,19 +344,18 @@ defmodule Linkify.Parser do {:cont, {buffer, user_acc}} {link, user_acc} -> - {:halt, {restore_stripped_symbols(buffer, str, link, opts), user_acc}} + {:halt, {restore_stripped_symbols(buffer, str, link), user_acc}} link -> - {:halt, {restore_stripped_symbols(buffer, str, link, opts), user_acc}} + {:halt, {restore_stripped_symbols(buffer, str, link), user_acc}} end end - defp restore_stripped_symbols(buffer, buffer, link, _), do: link + defp restore_stripped_symbols(buffer, buffer, link), do: link - defp restore_stripped_symbols(buffer, stripped_buffer, link, opts) do + defp restore_stripped_symbols(buffer, stripped_buffer, link) do buffer |> String.split(stripped_buffer) |> Enum.intersperse(link) - |> if(opts[:iodata], do: &Enum.reject(&1, fn el -> el == "" end), else: &Enum.join(&1)).() end end