From c8026cd16c37079e3b2468f06f37f7dcd6aa3f95 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Mon, 17 Jun 2019 23:06:31 +0300 Subject: [PATCH 1/4] Add an option to disable TLD validation Added `validate_tld` option, which can be set to false to disable tld validation and to :no_scheme to disable tld validation when a scheme is present Closes #8 --- lib/auto_linker.ex | 1 + lib/auto_linker/parser.ex | 16 +++++++---- test/parser_test.exs | 60 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/lib/auto_linker.ex b/lib/auto_linker.ex index 222cd79..d07e6df 100644 --- a/lib/auto_linker.ex +++ b/lib/auto_linker.ex @@ -48,6 +48,7 @@ defmodule AutoLinker do * `hashtag_prefix: nil` - a prefix to build a link for a hashtag (example: `https://example.com/tag/`) * `hashtag_handler: nil` - a custom handler to validate and formart a hashtag * `extra: false` - link urls with rarely used schemes (magnet, ipfs, irc, etc.) + * `validate_tld: true` - Set to false to disable TLD validation for urls, also can be set to :no_scheme to validate TLDs only for urls without a scheme (e.g `example.com` will be validated, but `http://example.loki` won't) Each of the above options can be specified when calling `link(text, opts)` or can be set in the `:auto_linker`'s configuration. For example: diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex index 739173f..0ca906d 100644 --- a/lib/auto_linker/parser.ex +++ b/lib/auto_linker/parser.ex @@ -42,7 +42,7 @@ defmodule AutoLinker.Parser do @tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> MapSet.new() - @default_opts ~w(url)a + @default_opts ~w(url validate_tld)a @doc """ Parse the given string, identifying items to link. @@ -262,7 +262,7 @@ defmodule AutoLinker.Parser do def check_and_link(buffer, opts, _user_acc) do str = strip_parens(buffer) - if url?(str, opts[:scheme]) do + if url?(str, opts[:scheme], opts[:validate_tld]) do case parse_link(str, opts) do ^buffer -> link_url(buffer, opts) url -> String.replace(buffer, url, link_url(url, opts)) @@ -315,12 +315,16 @@ defmodule AutoLinker.Parser do end # @doc false - def url?(buffer, true) do - valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && valid_tld?(buffer) + def url?(buffer, scheme, validate_tld \\ true) + + def url?(buffer, true, validate_tld) do + valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && + (!validate_tld or validate_tld == :no_scheme || valid_tld?(buffer)) end - def url?(buffer, _) do - valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer) + def url?(buffer, _, validate_tld) do + valid_url?(buffer) && Regex.match?(@match_url, buffer) && + (validate_tld == false || valid_tld?(buffer)) end def email?(buffer) do diff --git a/test/parser_test.exs b/test/parser_test.exs index 851fe3e..2b49a7c 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -4,7 +4,7 @@ defmodule AutoLinker.ParserTest do import AutoLinker.Parser - describe "url?/2" do + describe "url?/3" do test "valid scheme true" do valid_scheme_urls() |> Enum.each(fn url -> @@ -32,6 +32,48 @@ defmodule AutoLinker.ParserTest do refute url?(url, false) end) end + + test "checks the tld for url with a scheme when validate_tld: true" do + custom_tld_scheme_urls() + |> Enum.each(fn url -> + refute url?(url, true, true) + end) + end + + test "does not check the tld for url with a scheme when validate_tld: false" do + custom_tld_scheme_urls() + |> Enum.each(fn url -> + assert url?(url, true, false) + end) + end + + test "does not check the tld for url with a scheme when validate_tld: :no_scheme" do + custom_tld_scheme_urls() + |> Enum.each(fn url -> + assert url?(url, true, :no_scheme) + end) + end + + test "checks the tld for url without a scheme when validate_tld: true" do + custom_tld_non_scheme_urls() + |> Enum.each(fn url -> + refute url?(url, false, true) + end) + end + + test "checks the tld for url without a scheme when validate_tld: :no_scheme" do + custom_tld_non_scheme_urls() + |> Enum.each(fn url -> + refute url?(url, false, :no_scheme) + end) + end + + test "does not check the tld for url without a scheme when validate_tld: false" do + custom_tld_non_scheme_urls() + |> Enum.each(fn url -> + assert url?(url, false, false) + end) + end end describe "match_phone" do @@ -216,4 +258,20 @@ defmodule AutoLinker.ParserTest do "x5", "(555) 555-55" ] + + def custom_tld_scheme_urls, + do: [ + "http://whatever.null/", + "https://example.o/index.html", + "http://pleroma.i2p/test", + "http://misskey.loki" + ] + + def custom_tld_non_scheme_urls, + do: [ + "whatever.null/", + "example.o/index.html", + "pleroma.i2p/test", + "misskey.loki" + ] end From c19c7afa5b3ae417001cc9b090c792383bed48f8 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Tue, 18 Jun 2019 12:56:17 +0300 Subject: [PATCH 2/4] Refactor url?/2 to use opts --- lib/auto_linker/parser.ex | 19 +++++++++---------- test/parser_test.exs | 22 +++++++++++----------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex index 0ca906d..ba0913e 100644 --- a/lib/auto_linker/parser.ex +++ b/lib/auto_linker/parser.ex @@ -262,7 +262,7 @@ defmodule AutoLinker.Parser do def check_and_link(buffer, opts, _user_acc) do str = strip_parens(buffer) - if url?(str, opts[:scheme], opts[:validate_tld]) do + if url?(str, opts) do case parse_link(str, opts) do ^buffer -> link_url(buffer, opts) url -> String.replace(buffer, url, link_url(url, opts)) @@ -315,16 +315,15 @@ defmodule AutoLinker.Parser do end # @doc false - def url?(buffer, scheme, validate_tld \\ true) - def url?(buffer, true, validate_tld) do - valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && - (!validate_tld or validate_tld == :no_scheme || valid_tld?(buffer)) - end - - def url?(buffer, _, validate_tld) do - valid_url?(buffer) && Regex.match?(@match_url, buffer) && - (validate_tld == false || valid_tld?(buffer)) + def url?(buffer, opts) do + if opts[:scheme] do + valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && + (!opts[:validate_tld] or opts[:validate_tld] == :no_scheme || valid_tld?(buffer)) + else + valid_url?(buffer) && Regex.match?(@match_url, buffer) && + (opts[:validate_tld] == false || valid_tld?(buffer)) + end end def email?(buffer) do diff --git a/test/parser_test.exs b/test/parser_test.exs index 2b49a7c..ba2294f 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -4,74 +4,74 @@ defmodule AutoLinker.ParserTest do import AutoLinker.Parser - describe "url?/3" do + describe "url?/2" do test "valid scheme true" do valid_scheme_urls() |> Enum.each(fn url -> - assert url?(url, true) + assert url?(url, scheme: true, validate_tld: true) end) end test "invalid scheme true" do invalid_scheme_urls() |> Enum.each(fn url -> - refute url?(url, true) + refute url?(url, scheme: true, validate_tld: true) end) end test "valid scheme false" do valid_non_scheme_urls() |> Enum.each(fn url -> - assert url?(url, false) + assert url?(url, scheme: false, validate_tld: true) end) end test "invalid scheme false" do invalid_non_scheme_urls() |> Enum.each(fn url -> - refute url?(url, false) + refute url?(url, scheme: false, validate_tld: true) end) end test "checks the tld for url with a scheme when validate_tld: true" do custom_tld_scheme_urls() |> Enum.each(fn url -> - refute url?(url, true, true) + refute url?(url, scheme: true, validate_tld: true) end) end test "does not check the tld for url with a scheme when validate_tld: false" do custom_tld_scheme_urls() |> Enum.each(fn url -> - assert url?(url, true, false) + assert url?(url, scheme: true, validate_tld: false) end) end test "does not check the tld for url with a scheme when validate_tld: :no_scheme" do custom_tld_scheme_urls() |> Enum.each(fn url -> - assert url?(url, true, :no_scheme) + assert url?(url, scheme: true, validate_tld: :no_scheme) end) end test "checks the tld for url without a scheme when validate_tld: true" do custom_tld_non_scheme_urls() |> Enum.each(fn url -> - refute url?(url, false, true) + refute url?(url, scheme: false, validate_tld: true) end) end test "checks the tld for url without a scheme when validate_tld: :no_scheme" do custom_tld_non_scheme_urls() |> Enum.each(fn url -> - refute url?(url, false, :no_scheme) + refute url?(url, scheme: false, validate_tld: :no_scheme) end) end test "does not check the tld for url without a scheme when validate_tld: false" do custom_tld_non_scheme_urls() |> Enum.each(fn url -> - assert url?(url, false, false) + assert url?(url, scheme: false, validate_tld: false) end) end end From 1a341fdac33211f5eae1ad6e645fe1b9f39834bc Mon Sep 17 00:00:00 2001 From: rinpatch Date: Tue, 18 Jun 2019 13:07:14 +0300 Subject: [PATCH 3/4] Move checking for validate_tld to valid_tld? --- lib/auto_linker/parser.ex | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex index ba0913e..2b84d26 100644 --- a/lib/auto_linker/parser.ex +++ b/lib/auto_linker/parser.ex @@ -318,30 +318,37 @@ defmodule AutoLinker.Parser do def url?(buffer, opts) do if opts[:scheme] do - valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && - (!opts[:validate_tld] or opts[:validate_tld] == :no_scheme || valid_tld?(buffer)) + valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && valid_tld?(buffer, opts) else - valid_url?(buffer) && Regex.match?(@match_url, buffer) && - (opts[:validate_tld] == false || valid_tld?(buffer)) + valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer, opts) end end def email?(buffer) do - valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer) + valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, []) end defp valid_url?(url), do: !Regex.match?(@invalid_url, url) - def valid_tld?(buffer) do - with [host] <- Regex.run(@match_hostname, buffer, capture: [:host]) do - if ip?(host) do + def valid_tld?(buffer, opts) do + cond do + opts[:validate_tld] == false -> true - else - tld = host |> String.split(".") |> List.last() - MapSet.member?(@tlds, tld) - end - else - _ -> false + + opts[:validate_tld] == :no_scheme && opts[:scheme] -> + true + + true -> + with [host] <- Regex.run(@match_hostname, buffer, capture: [:host]) do + if ip?(host) do + true + else + tld = host |> String.split(".") |> List.last() + MapSet.member?(@tlds, tld) + end + else + _ -> false + end end end From 70cbfdc84eaa0ba478c396f8af581b25b50be5f3 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Tue, 18 Jun 2019 13:25:44 +0300 Subject: [PATCH 4/4] Extend email? to utilize validate_tld --- lib/auto_linker.ex | 2 +- lib/auto_linker/parser.ex | 8 ++++---- test/parser_test.exs | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/lib/auto_linker.ex b/lib/auto_linker.ex index d07e6df..01688d8 100644 --- a/lib/auto_linker.ex +++ b/lib/auto_linker.ex @@ -48,7 +48,7 @@ defmodule AutoLinker do * `hashtag_prefix: nil` - a prefix to build a link for a hashtag (example: `https://example.com/tag/`) * `hashtag_handler: nil` - a custom handler to validate and formart a hashtag * `extra: false` - link urls with rarely used schemes (magnet, ipfs, irc, etc.) - * `validate_tld: true` - Set to false to disable TLD validation for urls, also can be set to :no_scheme to validate TLDs only for urls without a scheme (e.g `example.com` will be validated, but `http://example.loki` won't) + * `validate_tld: true` - Set to false to disable TLD validation for urls/emails, also can be set to :no_scheme to validate TLDs only for urls without a scheme (e.g `example.com` will be validated, but `http://example.loki` won't) Each of the above options can be specified when calling `link(text, opts)` or can be set in the `:auto_linker`'s configuration. For example: diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex index 2b84d26..b7b4b75 100644 --- a/lib/auto_linker/parser.ex +++ b/lib/auto_linker/parser.ex @@ -285,7 +285,7 @@ defmodule AutoLinker.Parser do defp strip_parens(buffer), do: buffer def check_and_link_email(buffer, opts, _user_acc) do - if email?(buffer), do: link_email(buffer, opts), else: buffer + if email?(buffer, opts), do: link_email(buffer, opts), else: buffer end def check_and_link_phone(buffer, opts, _user_acc) do @@ -307,7 +307,7 @@ defmodule AutoLinker.Parser do end def check_and_link_extra("xmpp:" <> handle, opts, _user_acc) do - if email?(handle), do: link_extra("xmpp:" <> handle, opts), else: handle + if email?(handle, opts), do: link_extra("xmpp:" <> handle, opts), else: handle end def check_and_link_extra(buffer, opts, _user_acc) do @@ -324,8 +324,8 @@ defmodule AutoLinker.Parser do end end - def email?(buffer) do - valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, []) + def email?(buffer, opts) do + valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, opts) end defp valid_url?(url), do: !Regex.match?(@invalid_url, url) diff --git a/test/parser_test.exs b/test/parser_test.exs index ba2294f..cb4f8fb 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -76,6 +76,36 @@ defmodule AutoLinker.ParserTest do end end + describe "email?" do + test "identifies valid emails" do + valid_emails() + |> Enum.each(fn email -> + assert email?(email, []) + end) + end + + test "identifies invalid emails" do + invalid_emails() + |> Enum.each(fn email -> + refute email?(email, []) + end) + end + + test "does not validate tlds when validate_tld: false" do + valid_custom_tld_emails() + |> Enum.each(fn email -> + assert email?(email, validate_tld: false) + end) + end + + test "validates tlds when validate_tld: true" do + valid_custom_tld_emails() + |> Enum.each(fn email -> + refute email?(email, validate_tld: true) + end) + end + end + describe "match_phone" do test "valid" do valid_phone_nunbers() @@ -274,4 +304,8 @@ defmodule AutoLinker.ParserTest do "pleroma.i2p/test", "misskey.loki" ] + + def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li"] + def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock", "xmpp:lain@trashserver.net"] + def valid_custom_tld_emails, do: ["guardian@33y6fjyhs3phzfjj.onion", "hi@company.null"] end