From b1f6c04eefa67c891d05f87a54ccc41b41b2d2b0 Mon Sep 17 00:00:00 2001 From: Justin Tormey Date: Mon, 27 Jul 2020 10:40:44 -0500 Subject: [PATCH 1/4] Add parser support for iodata and safe iodata --- lib/linkify.ex | 9 +++ lib/linkify/builder.ex | 45 ++++++++---- lib/linkify/parser.ex | 152 +++++++++++++++++++++++------------------ test/linkify_test.exs | 42 ++++++++++++ 4 files changed, 168 insertions(+), 80 deletions(-) diff --git a/lib/linkify.ex b/lib/linkify.ex index 5a5e720..d15d01c 100644 --- a/lib/linkify.ex +++ b/lib/linkify.ex @@ -40,11 +40,20 @@ defmodule Linkify do * `hashtag_handler: nil` - a custom handler to validate and formart a hashtag * `extra: false` - link urls with rarely used schemes (magnet, ipfs, irc, etc.) * `validate_tld: true` - Set to false to disable TLD validation for urls/emails, also can be set to :no_scheme to validate TLDs only for urls without a scheme (e.g `example.com` will be validated, but `http://example.loki` won't) + * `iodata` - Set to `true` to return iodata as a result, or `:safe` for iodata with linkified anchor tags wrapped in Phoenix.HTML `:safe` tuples (removes need for further sanitization) """ def link(text, opts \\ []) do parse(text, opts) end + def link_to_iodata(text, opts \\ []) do + parse(text, Keyword.merge(opts, iodata: true)) + end + + def link_safe(text, opts \\ []) do + parse(text, Keyword.merge(opts, iodata: :safe)) + end + def link_map(text, acc, opts \\ []) do parse({text, acc}, opts) end diff --git a/lib/linkify/builder.ex b/lib/linkify/builder.ex index 385f6b3..8edf7e8 100644 --- a/lib/linkify/builder.ex +++ b/lib/linkify/builder.ex @@ -56,8 +56,9 @@ defmodule Linkify.Builder do |> strip_prefix(Map.get(opts, :strip_prefix, false)) |> truncate(Map.get(opts, :truncate, false)) - attrs = format_attrs(attrs) - "#{url}" + attrs + |> format_attrs() + |> format_tag(url, opts) end defp format_attrs(attrs) do @@ -123,23 +124,39 @@ defmodule Linkify.Builder do |> format_extra(uri, opts) end - def format_mention(attrs, name, _opts) do - attrs = format_attrs(attrs) - "@#{name}" + def format_mention(attrs, name, opts) do + attrs + |> format_attrs() + |> format_tag("@#{name}", opts) end - def format_hashtag(attrs, tag, _opts) do - attrs = format_attrs(attrs) - "##{tag}" + def format_hashtag(attrs, tag, opts) do + attrs + |> format_attrs() + |> format_tag("##{tag}", opts) end - def format_email(attrs, email, _opts) do - attrs = format_attrs(attrs) - ~s(#{email}) + def format_email(attrs, email, opts) do + attrs + |> format_attrs() + |> format_tag(email, opts) end - def format_extra(attrs, uri, _opts) do - attrs = format_attrs(attrs) - ~s(#{uri}) + def format_extra(attrs, uri, opts) do + attrs + |> format_attrs() + |> format_tag(uri, opts) + end + + def format_tag(attrs, content, %{iodata: true}) do + ["", content, ""] + end + + def format_tag(attrs, content, %{iodata: :safe}) do + [{:safe, [""]}, content, {:safe, ""}] + end + + def format_tag(attrs, content, _opts) do + "#{content}" end end diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index fe72668..9669844 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -62,135 +62,142 @@ defmodule Linkify.Parser do def parse(input, opts) do opts = Map.merge(@default_opts, opts) - opts_list = Map.to_list(opts) - - Enum.reduce(@types, input, fn - type, input -> - if {type, true} in opts_list do - do_parse(input, opts, {"", "", :parsing}, type) - else - input - end - end) + acc = if opts[:iodata], do: [], else: "" + do_parse(input, opts, {"", acc, :parsing}) end - defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler), + defp accumulate(acc, buffer) when is_list(acc), + do: [buffer | acc] + + defp accumulate(acc, buffer) when is_binary(acc), + do: acc <> buffer + + defp accumulate(acc, buffer, trailing) when is_list(acc), + do: [trailing, buffer | acc] + + defp accumulate(acc, buffer, trailing) when is_binary(acc), + do: acc <> buffer <> trailing + + defp do_parse({"", user_acc}, _opts, {"", acc, _}) when is_list(acc), + do: {Enum.reverse(acc), user_acc} + + defp do_parse({"", user_acc}, _opts, {"", acc, _}) when is_binary(acc), do: {acc, user_acc} - defp do_parse({"@" <> text, user_acc}, opts, {buffer, acc, :skip}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "@", :skip}, type) + defp do_parse({"@" <> text, user_acc}, opts, {buffer, acc, :skip}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "@"), :skip}) - defp do_parse({" text, user_acc}, opts, {buffer, acc, :parsing}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> " text, user_acc}, opts, {buffer, acc, :parsing}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, " text, user_acc}, opts, {buffer, acc, :parsing}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> " text, user_acc}, opts, {buffer, acc, :parsing}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, " text, user_acc}, opts, {buffer, acc, :parsing}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> " text, user_acc}, opts, {buffer, acc, :parsing}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "" <> text, user_acc}, opts, {buffer, acc, :skip}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "", :parsing}, type) + defp do_parse({"" <> text, user_acc}, opts, {buffer, acc, :skip}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ""), :parsing}) - defp do_parse({"" <> text, user_acc}, opts, {buffer, acc, :skip}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "", :parsing}, type) + defp do_parse({"" <> text, user_acc}, opts, {buffer, acc, :skip}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ""), :parsing}) - defp do_parse({"" <> text, user_acc}, opts, {buffer, acc, :skip}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "", :parsing}, type) + defp do_parse({"" <> text, user_acc}, opts, {buffer, acc, :skip}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ""), :parsing}) - defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, type), - do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, type) + defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}), + do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}) - defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, type) do - do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, type) + defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}) do + do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}) end - defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, type), + defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}), do: do_parse( {text, user_acc}, opts, - {"", acc <> buffer <> ">", {:html, level}}, - type + {"", accumulate(acc, buffer, ">"), {:html, level}} ) - defp do_parse({<> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, type) do - do_parse({text, user_acc}, opts, {"", acc <> <>, {:attrs, level}}, type) + defp do_parse({<> <> text, user_acc}, opts, {"", acc, {:attrs, level}}) do + do_parse({text, user_acc}, opts, {"", accumulate(acc, <>), {:attrs, level}}) end - defp do_parse({" text, user_acc}, opts, {buffer, acc, {:html, level}}, type) do - {buffer, user_acc} = link(type, buffer, opts, user_acc) + defp do_parse({" text, user_acc}, opts, {buffer, acc, {:html, level}}) do + {buffer, user_acc} = link(buffer, opts, user_acc) do_parse( {text, user_acc}, opts, - {"", acc <> buffer <> "" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, type), - do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, type) + defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}), + do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing}) - defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, type), + defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}), do: do_parse( {text, user_acc}, opts, - {"", acc <> buffer <> ">", {:html, level - 1}}, - type + {"", accumulate(acc, buffer, ">"), {:html, level - 1}} ) - defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, type) do - do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, type) + defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}) do + do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}) end defp do_parse( {<>, user_acc}, opts, - {buffer, acc, state}, - type + {buffer, acc, state} ) when char in [" ", "\r", "\n"] do - {buffer, user_acc} = link(type, buffer, opts, user_acc) + {buffer, user_acc} = link(buffer, opts, user_acc) do_parse( {text, user_acc}, opts, - {"", acc <> buffer <> char, state}, - type + {"", accumulate(acc, buffer, char), state} ) end - defp do_parse({<>, user_acc}, opts, {buffer, acc, state}, type) do - {buffer, user_acc} = link(type, buffer <> <>, opts, user_acc) + defp do_parse({<>, user_acc}, opts, {buffer, acc, state}) do + {buffer, user_acc} = link(buffer <> <>, opts, user_acc) do_parse( {"", user_acc}, opts, - {"", acc <> buffer, state}, - type + {"", accumulate(acc, buffer), state} ) end - defp do_parse({<> <> text, user_acc}, opts, {buffer, acc, state}, type), - do: do_parse({text, user_acc}, opts, {buffer <> <>, acc, state}, type) + defp do_parse({<> <> text, user_acc}, opts, {buffer, acc, state}), + do: do_parse({text, user_acc}, opts, {buffer <> <>, acc, state}) def check_and_link(:url, buffer, opts, _user_acc) do str = strip_parens(buffer) if url?(str, opts) do case @match_url |> Regex.run(str, capture: [:url]) |> hd() do - ^buffer -> link_url(buffer, opts) - url -> String.replace(buffer, url, link_url(url, opts)) + ^buffer -> + link_url(buffer, opts) + + url -> + buffer + |> String.split(url) + |> Enum.intersperse(link_url(url, opts)) + |> (if opts[:iodata], do: & &1, else: & Enum.join(&1)).() end else - buffer + :nomatch end end def check_and_link(:email, buffer, opts, _user_acc) do - if email?(buffer, opts), do: link_email(buffer, opts), else: buffer + if email?(buffer, opts), do: link_email(buffer, opts), else: :nomatch end def check_and_link(:mention, buffer, opts, user_acc) do @@ -210,7 +217,7 @@ defmodule Linkify.Parser do end def check_and_link(:extra, buffer, opts, _user_acc) do - if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: buffer + if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: :nomatch end defp strip_parens("(" <> buffer) do @@ -272,7 +279,7 @@ defmodule Linkify.Parser do end end - def link_hashtag(nil, buffer, _, _user_acc), do: buffer + def link_hashtag(nil, _buffer, _, _user_acc), do: :nomatch def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do hashtag @@ -286,7 +293,7 @@ defmodule Linkify.Parser do |> maybe_update_buffer(hashtag, buffer) end - def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc} + def link_mention(nil, _buffer, _, _user_acc), do: :nomatch def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do mention @@ -326,10 +333,23 @@ defmodule Linkify.Parser do Builder.create_extra_link(buffer, opts) end - defp link(type, buffer, opts, user_acc) do + defp link(buffer, opts, user_acc) do + opts_list = Map.to_list(opts) + + Enum.reduce_while @types, {buffer, user_acc}, fn type, _ -> + if {type, true} in opts_list do + check_and_link_reducer(type, buffer, opts, user_acc) + else + {:cont, {buffer, user_acc}} + end + end + end + + defp check_and_link_reducer(type, buffer, opts, user_acc) do case check_and_link(type, buffer, opts, user_acc) do - {buffer, user_acc} -> {buffer, user_acc} - buffer -> {buffer, user_acc} + :nomatch -> {:cont, {buffer, user_acc}} + {buffer, user_acc} -> {:halt, {buffer, user_acc}} + buffer -> {:halt, {buffer, user_acc}} end end end diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 0128bc2..3216b38 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -7,6 +7,16 @@ defmodule LinkifyTest do "google.com" end + test "default link iodata" do + assert Linkify.link_to_iodata("google.com") == + [["", "google.com", ""]] + end + + test "default link safe iodata" do + assert Linkify.link_safe("google.com") == + [[{:safe, [""]}, "google.com", {:safe, ""}]] + end + test "does on link existing links" do text = ~s(google.com) assert Linkify.link(text) == text @@ -24,16 +34,38 @@ defmodule LinkifyTest do ) == expected end + test "all kinds of links iodata" do + text = "hello google.com https://ddg.com user@email.com irc:///mIRC" + + expected = + ["hello", " ", ["", "google.com", ""], " ", ["", "https://ddg.com", ""], " ", ["", "user@email.com", ""], " ", ["", "irc:///mIRC", ""]] + + assert Linkify.link_to_iodata(text, + email: true, + extra: true + ) == expected + end + test "class attribute" do assert Linkify.link("google.com", class: "linkified") == "google.com" end + test "class attribute iodata" do + assert Linkify.link_to_iodata("google.com", class: "linkified") == + [["", "google.com", ""]] + end + test "rel attribute" do assert Linkify.link("google.com", rel: "noopener noreferrer") == "google.com" end + test "rel attribute iodata" do + assert Linkify.link_to_iodata("google.com", rel: "noopener noreferrer") == + [["", "google.com", ""]] + end + test "rel as function" do text = "google.com" @@ -54,6 +86,16 @@ defmodule LinkifyTest do assert Linkify.link(text, rel: custom_rel) == expected end + test "strip parens" do + assert Linkify.link("(google.com)") == + "(google.com)" + end + + test "strip parens iodata" do + assert Linkify.link_to_iodata("(google.com)") == + [["(", ["", "google.com", ""], ")"]] + end + test "link_map/2" do assert Linkify.link_map("google.com", []) == {"google.com", []} From 1d52bfeb81d866bb1e42b81c983af6f68d4c3164 Mon Sep 17 00:00:00 2001 From: Justin Tormey Date: Mon, 27 Jul 2020 12:46:26 -0500 Subject: [PATCH 2/4] Fix formatting --- lib/linkify/parser.ex | 6 +++--- test/linkify_test.exs | 41 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 9669844..759007e 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -189,7 +189,7 @@ defmodule Linkify.Parser do buffer |> String.split(url) |> Enum.intersperse(link_url(url, opts)) - |> (if opts[:iodata], do: & &1, else: & Enum.join(&1)).() + |> if(opts[:iodata], do: & &1, else: &Enum.join(&1)).() end else :nomatch @@ -336,13 +336,13 @@ defmodule Linkify.Parser do defp link(buffer, opts, user_acc) do opts_list = Map.to_list(opts) - Enum.reduce_while @types, {buffer, user_acc}, fn type, _ -> + Enum.reduce_while(@types, {buffer, user_acc}, fn type, _ -> if {type, true} in opts_list do check_and_link_reducer(type, buffer, opts, user_acc) else {:cont, {buffer, user_acc}} end - end + end) end defp check_and_link_reducer(type, buffer, opts, user_acc) do diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 3216b38..c92e9fa 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -14,7 +14,13 @@ defmodule LinkifyTest do test "default link safe iodata" do assert Linkify.link_safe("google.com") == - [[{:safe, [""]}, "google.com", {:safe, ""}]] + [ + [ + {:safe, [""]}, + "google.com", + {:safe, ""} + ] + ] end test "does on link existing links" do @@ -37,8 +43,17 @@ defmodule LinkifyTest do test "all kinds of links iodata" do text = "hello google.com https://ddg.com user@email.com irc:///mIRC" - expected = - ["hello", " ", ["", "google.com", ""], " ", ["", "https://ddg.com", ""], " ", ["", "user@email.com", ""], " ", ["", "irc:///mIRC", ""]] + expected = [ + "hello", + " ", + ["", "google.com", ""], + " ", + ["", "https://ddg.com", ""], + " ", + ["", "user@email.com", ""], + " ", + ["", "irc:///mIRC", ""] + ] assert Linkify.link_to_iodata(text, email: true, @@ -53,7 +68,15 @@ defmodule LinkifyTest do test "class attribute iodata" do assert Linkify.link_to_iodata("google.com", class: "linkified") == - [["", "google.com", ""]] + [ + [ + "", + "google.com", + "" + ] + ] end test "rel attribute" do @@ -63,7 +86,15 @@ defmodule LinkifyTest do test "rel attribute iodata" do assert Linkify.link_to_iodata("google.com", rel: "noopener noreferrer") == - [["", "google.com", ""]] + [ + [ + "", + "google.com", + "" + ] + ] end test "rel as function" do From 61871d137af5276d333d91e3eee11132a1c45d8b Mon Sep 17 00:00:00 2001 From: Justin Tormey Date: Mon, 27 Jul 2020 13:37:15 -0500 Subject: [PATCH 3/4] Reduce work for checking precense of type in opts --- lib/linkify/parser.ex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 759007e..2079776 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -334,10 +334,8 @@ defmodule Linkify.Parser do end defp link(buffer, opts, user_acc) do - opts_list = Map.to_list(opts) - Enum.reduce_while(@types, {buffer, user_acc}, fn type, _ -> - if {type, true} in opts_list do + if opts[type] == true do check_and_link_reducer(type, buffer, opts, user_acc) else {:cont, {buffer, user_acc}} From e720efd50ccb69b14a173a9906c5f5b1c25e4f6c Mon Sep 17 00:00:00 2001 From: Justin Tormey Date: Thu, 20 Aug 2020 13:39:59 -0500 Subject: [PATCH 4/4] Accumulate as iodata, only convert to binary if necessary --- lib/linkify/parser.ex | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 2079776..75717de 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -62,28 +62,25 @@ defmodule Linkify.Parser do def parse(input, opts) do opts = Map.merge(@default_opts, opts) - acc = if opts[:iodata], do: [], else: "" - do_parse(input, opts, {"", acc, :parsing}) + + {buffer, user_acc} = do_parse(input, opts, {"", [], :parsing}) + + if opts[:iodata] do + {buffer, user_acc} + else + {IO.iodata_to_binary(buffer), user_acc} + end end - defp accumulate(acc, buffer) when is_list(acc), + defp accumulate(acc, buffer), do: [buffer | acc] - defp accumulate(acc, buffer) when is_binary(acc), - do: acc <> buffer - - defp accumulate(acc, buffer, trailing) when is_list(acc), + defp accumulate(acc, buffer, trailing), do: [trailing, buffer | acc] - defp accumulate(acc, buffer, trailing) when is_binary(acc), - do: acc <> buffer <> trailing - - defp do_parse({"", user_acc}, _opts, {"", acc, _}) when is_list(acc), + defp do_parse({"", user_acc}, _opts, {"", acc, _}), do: {Enum.reverse(acc), user_acc} - defp do_parse({"", user_acc}, _opts, {"", acc, _}) when is_binary(acc), - do: {acc, user_acc} - defp do_parse({"@" <> text, user_acc}, opts, {buffer, acc, :skip}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "@"), :skip}) @@ -146,7 +143,7 @@ defmodule Linkify.Parser do ) defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}) do - do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}) + do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer), {:attrs, level}}) end defp do_parse(