From 26f5310379effc184e8e93c13b4f26c3d6528e62 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Tue, 17 Nov 2020 16:27:47 +0100 Subject: [PATCH 1/9] parser: Add onion as an extra TLD --- lib/linkify/parser.ex | 6 +++++- test/parser_test.exs | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index d329e37..1b82f8e 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -41,7 +41,11 @@ defmodule Linkify.Parser do "ssb://" ] - @tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> MapSet.new() + @tlds "./priv/tlds.txt" + |> File.read!() + |> String.split("\n", trim: true) + |> Enum.concat(["onion"]) + |> MapSet.new() @default_opts %{ url: true, diff --git a/test/parser_test.exs b/test/parser_test.exs index 8692f46..5d83a6a 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -304,7 +304,7 @@ defmodule Linkify.ParserTest do "misskey.loki" ] - def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li"] + def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"] def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock", "xmpp:lain@trashserver.net"] - def valid_custom_tld_emails, do: ["guardian@33y6fjyhs3phzfjj.onion", "hi@company.null"] + def valid_custom_tld_emails, do: ["hi@company.null"] end From 649fc9125daaef03abdba867f600ed398bd5c5b0 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Tue, 17 Nov 2020 16:27:24 +0100 Subject: [PATCH 2/9] parser: Validate IPv6, IDN compatibility in email and mentions --- lib/linkify/parser.ex | 73 ++++++++++++++++++++++++++++++++++--------- test/linkify_test.exs | 20 ++++-------- test/parser_test.exs | 2 +- 3 files changed, 65 insertions(+), 30 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 1b82f8e..73e7c3d 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -9,17 +9,11 @@ defmodule Linkify.Parser do @match_url ~r{^(?:\W*)?(?(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u - @match_hostname ~r{^\W*(?https?:\/\/)?(?:[^@\n]+\\w@)?(?[^:#~\/\n?]+)}u - - @match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" + @get_scheme_host ~r{^\W*(?https?:\/\/)?(?:[^@\n]+\\w@)?(?[^:#~\/\n?]+)}u # @user # @user@example.com # credo:disable-for-next-line - @match_mention ~r/^(?:\W*)?(?@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)|^(?:\W*)?(?@[a-zA-Z\d_-]+)/u - - # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address - @match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u @match_hashtag ~r/^(?\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u @@ -63,7 +57,7 @@ defmodule Linkify.Parser do ~s{Check out google.com} """ - @types [:url, :email, :hashtag, :extra, :mention] + @types [:url, :hashtag, :extra, :mention, :email] def parse(input, opts \\ %{}) def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0) @@ -224,7 +218,11 @@ defmodule Linkify.Parser do end def email?(buffer, opts) do - valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, opts) + # Note: In reality the local part can only be checked by the remote server + case Regex.run(~r/^(?.*)@(?[^@]+)$/, buffer, capture: [:user, :host]) do + [_user, hostname] -> valid_hostname?(hostname) && valid_tld?(hostname, opts) + _ -> false + end end defp valid_url?(url), do: !Regex.match?(@invalid_url, url) @@ -237,7 +235,7 @@ defmodule Linkify.Parser do Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme. """ def valid_tld?(url, opts) do - [scheme, host] = Regex.run(@match_hostname, url, capture: [:scheme, :host]) + [scheme, host] = Regex.run(@get_scheme_host, url, capture: [:scheme, :host]) cond do opts[:validate_tld] == false -> @@ -256,13 +254,58 @@ defmodule Linkify.Parser do end end - def ip?(buffer), do: Regex.match?(@match_ip, buffer) + def safe_to_integer(string, base \\ 10) do + String.to_integer(string, base) + rescue + _ -> + nil + end + + def ip?(buffer) do + v4 = String.split(buffer, ".") + + v6 = + buffer + |> String.trim_leading("[") + |> String.trim_trailing("]") + |> String.split(":", trim: true) + + cond do + length(v4) == 4 -> + !Enum.any?(v4, fn x -> safe_to_integer(x, 10) not in 0..255 end) + + length(v6) in 1..8 -> + !Enum.any?(v4, fn x -> safe_to_integer(x, 16) not in 0..0xFFFF end) + + false -> + false + end + end + + # IDN-compatible, ported from musl-libc's is_valid_hostname() + def valid_hostname?(hostname) do + hostname + |> String.to_charlist() + |> Enum.any?(fn s -> + !(s >= 0x80 || s in 0x30..0x39 || s in 0x41..0x5A || s in 0x61..0x7A || s in '.-') + end) + |> Kernel.!() + end def match_mention(buffer) do - case Regex.run(@match_mention, buffer, capture: [:long, :short]) do - [mention, ""] -> mention - ["", mention] -> mention - _ -> nil + case Regex.run(~r/^@(?[a-zA-Z\d_-]+)(@(?[^@]+))?$/, buffer, + capture: [:user, :host] + ) do + [user, ""] -> + "@" <> user + + [user, hostname] -> + if valid_hostname?(hostname) && valid_tld?(hostname, []), + do: "@" <> user <> "@" <> hostname, + else: nil + + _ -> + nil end end diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 2568052..910ba17 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -244,7 +244,7 @@ defmodule LinkifyTest do end expected = - ~s(Hello again, @@user.<script></script>\nThis is on another :moominmamma: line. #2hu #epic #phantasmagoric) + ~s(Hello again, @user.<script></script>\nThis is on another :moominmamma: line. #2hu #epic #phantasmagoric) assert Linkify.link(text, mention: true, @@ -377,22 +377,14 @@ defmodule LinkifyTest do text = "That's @user@example.com's server" - expected = - "That's @user@example.com's server" - - assert Linkify.link(text, - mention: true, - mention_prefix: "https://example.com/user/" - ) == expected + assert Linkify.link(text, mention: true, mention_prefix: "https://example.com/user/") == + text end - test "mentions with symbols before them" do - text = "@@example hey! >@@test@example.com" + test "mentions with no word-separation before them" do + text = "@@example hey! >@@test@example.com idolm@ster" - expected = - "@@example hey! >@@test@example.com" - - assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected + assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text end test "invalid mentions" do diff --git a/test/parser_test.exs b/test/parser_test.exs index 5d83a6a..cd00df1 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -305,6 +305,6 @@ defmodule Linkify.ParserTest do ] def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"] - def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock", "xmpp:lain@trashserver.net"] + def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock"] def valid_custom_tld_emails, do: ["hi@company.null"] end From c14ac019af18a7a3c96206b9d7f752de2e8ae783 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 19 Nov 2020 18:13:55 +0000 Subject: [PATCH 3/9] Add IDN and punycode domain test --- test/linkify_test.exs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 910ba17..f3b129b 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -668,5 +668,19 @@ defmodule LinkifyTest do assert Linkify.link(text) == expected end + + test "IDN and punycode domain" do + text = "FrauBücher.com says Neiiighhh!" + + expected = "FrauBücher.com says Neiiighhh!" + + assert Linkify.link(text) == expected + + text = "xn--fraubcher-u9a.com says Neiiighhh!" + + expected = "xn--fraubcher-u9a.com says Neiiighhh!" + + assert Linkify.link(text) == expected + end end end From 31d41920ed4d3621faca5e74f9a2ef91577d2ab4 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 19 Nov 2020 18:25:54 +0000 Subject: [PATCH 4/9] Fix test group name --- test/linkify_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/linkify_test.exs b/test/linkify_test.exs index f3b129b..bdaffc1 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -494,7 +494,7 @@ defmodule LinkifyTest do assert Linkify.link(text, rel: false) == expected end - test "skip prefix" do + test "strip prefix" do assert Linkify.link("http://google.com", strip_prefix: true) == "google.com" From 17126aa662de56f93f661e7767f7148e5b753234 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 19 Nov 2020 18:29:41 +0000 Subject: [PATCH 5/9] Move tests under correct group, add test for URLs with IPv4 for domain --- test/linkify_test.exs | 69 ++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/test/linkify_test.exs b/test/linkify_test.exs index bdaffc1..164a34e 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -489,9 +489,49 @@ defmodule LinkifyTest do test "turn urls with schema into urls" do text = "📌https://google.com" + expected = "📌https://google.com" assert Linkify.link(text, rel: false) == expected + + text = "http://www.cs.vu.nl/~ast/intel/" + + expected = "http://www.cs.vu.nl/~ast/intel/" + + assert Linkify.link(text) == expected + + text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087" + + expected = + "https://forum.zdoom.org/viewtopic.php?f=44&t=57087" + + assert Linkify.link(text) == expected + + text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul" + + expected = + "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul" + + assert Linkify.link(text) == expected + + text = "https://en.wikipedia.org/wiki/Duff's_device" + + expected = + "https://en.wikipedia.org/wiki/Duff's_device" + + assert Linkify.link(text) == expected + + text = "https://1.1.1.1/" + + expected = "https://1.1.1.1/" + + assert Linkify.link(text) == expected + + text = "https://1.1.1.1:8080/" + + expected = "https://1.1.1.1:8080/" + + assert Linkify.link(text) == expected end test "strip prefix" do @@ -525,35 +565,10 @@ defmodule LinkifyTest do assert Linkify.link(text, new_window: true) == expected text = "@username" + expected = "@username" + assert Linkify.link(text, new_window: true) == expected - - text = "http://www.cs.vu.nl/~ast/intel/" - - expected = "http://www.cs.vu.nl/~ast/intel/" - - assert Linkify.link(text) == expected - - text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087" - - expected = - "https://forum.zdoom.org/viewtopic.php?f=44&t=57087" - - assert Linkify.link(text) == expected - - text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul" - - expected = - "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul" - - assert Linkify.link(text) == expected - - text = "https://en.wikipedia.org/wiki/Duff's_device" - - expected = - "https://en.wikipedia.org/wiki/Duff's_device" - - assert Linkify.link(text) == expected end end From 3ce2c34709c650a0315bda0f0683545626badfa3 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 19 Nov 2020 18:55:17 +0000 Subject: [PATCH 6/9] Fix linking URLs/domains with trailing punctuation --- lib/linkify/parser.ex | 4 ++-- test/linkify_test.exs | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 73e7c3d..15c341e 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -19,7 +19,7 @@ defmodule Linkify.Parser do @match_skipped_tag ~r/^(?(a|code|pre)).*>*/ - @delimiters ~r/[,.;:>]*$/ + @delimiters ~r/[,.;:>?!]*$/ @prefix_extra [ "magnet:?", @@ -249,7 +249,7 @@ defmodule Linkify.Parser do true true -> - tld = host |> String.trim_trailing(".") |> String.split(".") |> List.last() + tld = host |> strip_punctuation() |> String.split(".") |> List.last() MapSet.member?(@tlds, tld) end end diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 164a34e..32df145 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -675,13 +675,33 @@ defmodule LinkifyTest do assert Linkify.link(text) == expected end - test "Does not link trailing punctuation" do + test "Do not link trailing punctuation" do text = "You can find more info at https://pleroma.social." expected = "You can find more info at https://pleroma.social." assert Linkify.link(text) == expected + + text = "Of course it was google.com!!" + + expected = "Of course it was google.com!!" + + assert Linkify.link(text) == expected + + text = + "First I had to login to hotmail.com, then I had to delete emails because my 15MB quota was full." + + expected = + "First I had to login to hotmail.com, then I had to delete emails because my 15MB quota was full." + + assert Linkify.link(text) == expected + + text = "I looked at theonion.com; it was no longer funny." + + expected = "I looked at theonion.com; it was no longer funny." + + assert Linkify.link(text) == expected end test "IDN and punycode domain" do @@ -693,7 +713,8 @@ defmodule LinkifyTest do text = "xn--fraubcher-u9a.com says Neiiighhh!" - expected = "xn--fraubcher-u9a.com says Neiiighhh!" + expected = + "xn--fraubcher-u9a.com says Neiiighhh!" assert Linkify.link(text) == expected end From 5573fe6d979735219d03d71beb5a26dddad69a5e Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 19 Nov 2020 19:30:20 +0000 Subject: [PATCH 7/9] Test IDN and punycode domains in mentions --- test/linkify_test.exs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 32df145..3d9bf55 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -392,6 +392,20 @@ defmodule LinkifyTest do assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text end + + test "IDN domain" do + text = "hello @lain@我爱你.com" + + expected = "hello @lain@我爱你.com" + + assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected + + text = "hello @lain@xn--6qq986b3xl.com" + + expected = "hello @lain@xn--6qq986b3xl.com" + + assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected + end end describe "hashtag links" do From 9925f4514ef8a2421f76c86ba465d9163e7b67ca Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 19 Nov 2020 19:49:21 +0000 Subject: [PATCH 8/9] Add .onion domain tests --- test/linkify_test.exs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 3d9bf55..9e94437 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -406,6 +406,15 @@ defmodule LinkifyTest do assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected end + + test ".onion domain" do + text = "Hey @admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion" + + expected = + "Hey @admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion" + + assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected + end end describe "hashtag links" do @@ -713,7 +722,8 @@ defmodule LinkifyTest do text = "I looked at theonion.com; it was no longer funny." - expected = "I looked at theonion.com; it was no longer funny." + expected = + "I looked at theonion.com; it was no longer funny." assert Linkify.link(text) == expected end @@ -732,5 +742,15 @@ defmodule LinkifyTest do assert Linkify.link(text) == expected end + + test ".onion domain" do + text = + "The riseup.net hidden service is at vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion" + + expected = + "The riseup.net hidden service is at vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion" + + assert Linkify.link(text) == expected + end end end From 62b385674d7a5801625979465a763250352c071a Mon Sep 17 00:00:00 2001 From: Haelwenn Date: Thu, 19 Nov 2020 20:35:56 +0000 Subject: [PATCH 9/9] Apply 1 suggestion(s) to 1 file(s) --- lib/linkify/parser.ex | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 15c341e..680ddac 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -11,10 +11,6 @@ defmodule Linkify.Parser do @get_scheme_host ~r{^\W*(?https?:\/\/)?(?:[^@\n]+\\w@)?(?[^:#~\/\n?]+)}u - # @user - # @user@example.com - # credo:disable-for-next-line - @match_hashtag ~r/^(?\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u @match_skipped_tag ~r/^(?(a|code|pre)).*>*/