Merge branch 'features/hostname_validation' into 'master'
Add .onion in the default TLDs, Validate IPv6, IDN compatibility in email and mentions See merge request pleroma/elixir-libraries/linkify!29
This commit is contained in:
commit
9b86e5e5e4
3 changed files with 187 additions and 68 deletions
|
@ -9,23 +9,13 @@ defmodule Linkify.Parser do
|
|||
|
||||
@match_url ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
|
||||
|
||||
@match_hostname ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
|
||||
|
||||
@match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
|
||||
|
||||
# @user
|
||||
# @user@example.com
|
||||
# credo:disable-for-next-line
|
||||
@match_mention ~r/^(?:\W*)?(?<long>@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)|^(?:\W*)?(?<short>@[a-zA-Z\d_-]+)/u
|
||||
|
||||
# https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
|
||||
@match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u
|
||||
@get_scheme_host ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
|
||||
|
||||
@match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
|
||||
|
||||
@match_skipped_tag ~r/^(?<tag>(a|code|pre)).*>*/
|
||||
|
||||
@delimiters ~r/[,.;:>]*$/
|
||||
@delimiters ~r/[,.;:>?!]*$/
|
||||
|
||||
@prefix_extra [
|
||||
"magnet:?",
|
||||
|
@ -41,7 +31,11 @@ defmodule Linkify.Parser do
|
|||
"ssb://"
|
||||
]
|
||||
|
||||
@tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> MapSet.new()
|
||||
@tlds "./priv/tlds.txt"
|
||||
|> File.read!()
|
||||
|> String.split("\n", trim: true)
|
||||
|> Enum.concat(["onion"])
|
||||
|> MapSet.new()
|
||||
|
||||
@default_opts %{
|
||||
url: true,
|
||||
|
@ -59,7 +53,7 @@ defmodule Linkify.Parser do
|
|||
~s{Check out <a href="http://google.com">google.com</a>}
|
||||
"""
|
||||
|
||||
@types [:url, :email, :hashtag, :extra, :mention]
|
||||
@types [:url, :hashtag, :extra, :mention, :email]
|
||||
|
||||
def parse(input, opts \\ %{})
|
||||
def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0)
|
||||
|
@ -220,7 +214,11 @@ defmodule Linkify.Parser do
|
|||
end
|
||||
|
||||
def email?(buffer, opts) do
|
||||
valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, opts)
|
||||
# Note: In reality the local part can only be checked by the remote server
|
||||
case Regex.run(~r/^(?<user>.*)@(?<host>[^@]+)$/, buffer, capture: [:user, :host]) do
|
||||
[_user, hostname] -> valid_hostname?(hostname) && valid_tld?(hostname, opts)
|
||||
_ -> false
|
||||
end
|
||||
end
|
||||
|
||||
defp valid_url?(url), do: !Regex.match?(@invalid_url, url)
|
||||
|
@ -233,7 +231,7 @@ defmodule Linkify.Parser do
|
|||
Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme.
|
||||
"""
|
||||
def valid_tld?(url, opts) do
|
||||
[scheme, host] = Regex.run(@match_hostname, url, capture: [:scheme, :host])
|
||||
[scheme, host] = Regex.run(@get_scheme_host, url, capture: [:scheme, :host])
|
||||
|
||||
cond do
|
||||
opts[:validate_tld] == false ->
|
||||
|
@ -247,18 +245,63 @@ defmodule Linkify.Parser do
|
|||
true
|
||||
|
||||
true ->
|
||||
tld = host |> String.trim_trailing(".") |> String.split(".") |> List.last()
|
||||
tld = host |> strip_punctuation() |> String.split(".") |> List.last()
|
||||
MapSet.member?(@tlds, tld)
|
||||
end
|
||||
end
|
||||
|
||||
def ip?(buffer), do: Regex.match?(@match_ip, buffer)
|
||||
def safe_to_integer(string, base \\ 10) do
|
||||
String.to_integer(string, base)
|
||||
rescue
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
|
||||
def ip?(buffer) do
|
||||
v4 = String.split(buffer, ".")
|
||||
|
||||
v6 =
|
||||
buffer
|
||||
|> String.trim_leading("[")
|
||||
|> String.trim_trailing("]")
|
||||
|> String.split(":", trim: true)
|
||||
|
||||
cond do
|
||||
length(v4) == 4 ->
|
||||
!Enum.any?(v4, fn x -> safe_to_integer(x, 10) not in 0..255 end)
|
||||
|
||||
length(v6) in 1..8 ->
|
||||
!Enum.any?(v4, fn x -> safe_to_integer(x, 16) not in 0..0xFFFF end)
|
||||
|
||||
false ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
# IDN-compatible, ported from musl-libc's is_valid_hostname()
|
||||
def valid_hostname?(hostname) do
|
||||
hostname
|
||||
|> String.to_charlist()
|
||||
|> Enum.any?(fn s ->
|
||||
!(s >= 0x80 || s in 0x30..0x39 || s in 0x41..0x5A || s in 0x61..0x7A || s in '.-')
|
||||
end)
|
||||
|> Kernel.!()
|
||||
end
|
||||
|
||||
def match_mention(buffer) do
|
||||
case Regex.run(@match_mention, buffer, capture: [:long, :short]) do
|
||||
[mention, ""] -> mention
|
||||
["", mention] -> mention
|
||||
_ -> nil
|
||||
case Regex.run(~r/^@(?<user>[a-zA-Z\d_-]+)(@(?<host>[^@]+))?$/, buffer,
|
||||
capture: [:user, :host]
|
||||
) do
|
||||
[user, ""] ->
|
||||
"@" <> user
|
||||
|
||||
[user, hostname] ->
|
||||
if valid_hostname?(hostname) && valid_tld?(hostname, []),
|
||||
do: "@" <> user <> "@" <> hostname,
|
||||
else: nil
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -244,7 +244,7 @@ defmodule LinkifyTest do
|
|||
end
|
||||
|
||||
expected =
|
||||
~s(Hello again, <span class="h-card"><a href="#/user/user">@<span>@user</span></a></span>.<script></script>\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>)
|
||||
~s(Hello again, @user.<script></script>\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>)
|
||||
|
||||
assert Linkify.link(text,
|
||||
mention: true,
|
||||
|
@ -385,22 +385,14 @@ defmodule LinkifyTest do
|
|||
|
||||
text = "That's @user@example.com's server"
|
||||
|
||||
expected =
|
||||
"That's <a href=\"https://example.com/user/user@example.com\">@user@example.com</a>'s server"
|
||||
|
||||
assert Linkify.link(text,
|
||||
mention: true,
|
||||
mention_prefix: "https://example.com/user/"
|
||||
) == expected
|
||||
assert Linkify.link(text, mention: true, mention_prefix: "https://example.com/user/") ==
|
||||
text
|
||||
end
|
||||
|
||||
test "mentions with symbols before them" do
|
||||
text = "@@example hey! >@@test@example.com"
|
||||
test "mentions with no word-separation before them" do
|
||||
text = "@@example hey! >@@test@example.com idolm@ster"
|
||||
|
||||
expected =
|
||||
"@<a href=\"/users/example\">@example</a> hey! >@<a href=\"/users/test@example.com\">@test@example.com</a>"
|
||||
|
||||
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
|
||||
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text
|
||||
end
|
||||
|
||||
test "invalid mentions" do
|
||||
|
@ -408,6 +400,29 @@ defmodule LinkifyTest do
|
|||
|
||||
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text
|
||||
end
|
||||
|
||||
test "IDN domain" do
|
||||
text = "hello @lain@我爱你.com"
|
||||
|
||||
expected = "hello <a href=\"/users/lain@我爱你.com\">@lain@我爱你.com</a>"
|
||||
|
||||
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
|
||||
|
||||
text = "hello @lain@xn--6qq986b3xl.com"
|
||||
|
||||
expected = "hello <a href=\"/users/lain@xn--6qq986b3xl.com\">@lain@xn--6qq986b3xl.com</a>"
|
||||
|
||||
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
|
||||
end
|
||||
|
||||
test ".onion domain" do
|
||||
text = "Hey @admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion"
|
||||
|
||||
expected =
|
||||
"Hey <a href=\"/users/admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion\">@admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion</a>"
|
||||
|
||||
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
|
||||
end
|
||||
end
|
||||
|
||||
describe "hashtag links" do
|
||||
|
@ -505,12 +520,52 @@ defmodule LinkifyTest do
|
|||
|
||||
test "turn urls with schema into urls" do
|
||||
text = "📌https://google.com"
|
||||
|
||||
expected = "📌<a href=\"https://google.com\">https://google.com</a>"
|
||||
|
||||
assert Linkify.link(text, rel: false) == expected
|
||||
|
||||
text = "http://www.cs.vu.nl/~ast/intel/"
|
||||
|
||||
expected = "<a href=\"http://www.cs.vu.nl/~ast/intel/\">http://www.cs.vu.nl/~ast/intel/</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087"
|
||||
|
||||
expected =
|
||||
"<a href=\"https://forum.zdoom.org/viewtopic.php?f=44&t=57087\">https://forum.zdoom.org/viewtopic.php?f=44&t=57087</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul"
|
||||
|
||||
expected =
|
||||
"<a href=\"https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul\">https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://en.wikipedia.org/wiki/Duff's_device"
|
||||
|
||||
expected =
|
||||
"<a href=\"https://en.wikipedia.org/wiki/Duff's_device\">https://en.wikipedia.org/wiki/Duff's_device</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://1.1.1.1/"
|
||||
|
||||
expected = "<a href=\"https://1.1.1.1/\">https://1.1.1.1/</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://1.1.1.1:8080/"
|
||||
|
||||
expected = "<a href=\"https://1.1.1.1:8080/\">https://1.1.1.1:8080/</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
end
|
||||
|
||||
test "skip prefix" do
|
||||
test "strip prefix" do
|
||||
assert Linkify.link("http://google.com", strip_prefix: true) ==
|
||||
"<a href=\"http://google.com\">google.com</a>"
|
||||
|
||||
|
@ -541,35 +596,10 @@ defmodule LinkifyTest do
|
|||
assert Linkify.link(text, new_window: true) == expected
|
||||
|
||||
text = "@username"
|
||||
|
||||
expected = "@username"
|
||||
|
||||
assert Linkify.link(text, new_window: true) == expected
|
||||
|
||||
text = "http://www.cs.vu.nl/~ast/intel/"
|
||||
|
||||
expected = "<a href=\"http://www.cs.vu.nl/~ast/intel/\">http://www.cs.vu.nl/~ast/intel/</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087"
|
||||
|
||||
expected =
|
||||
"<a href=\"https://forum.zdoom.org/viewtopic.php?f=44&t=57087\">https://forum.zdoom.org/viewtopic.php?f=44&t=57087</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul"
|
||||
|
||||
expected =
|
||||
"<a href=\"https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul\">https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "https://en.wikipedia.org/wiki/Duff's_device"
|
||||
|
||||
expected =
|
||||
"<a href=\"https://en.wikipedia.org/wiki/Duff's_device\">https://en.wikipedia.org/wiki/Duff's_device</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -676,13 +706,59 @@ defmodule LinkifyTest do
|
|||
assert Linkify.link(text) == expected
|
||||
end
|
||||
|
||||
test "Does not link trailing punctuation" do
|
||||
test "Do not link trailing punctuation" do
|
||||
text = "You can find more info at https://pleroma.social."
|
||||
|
||||
expected =
|
||||
"You can find more info at <a href=\"https://pleroma.social\">https://pleroma.social</a>."
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "Of course it was google.com!!"
|
||||
|
||||
expected = "Of course it was <a href=\"http://google.com\">google.com</a>!!"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text =
|
||||
"First I had to login to hotmail.com, then I had to delete emails because my 15MB quota was full."
|
||||
|
||||
expected =
|
||||
"First I had to login to <a href=\"http://hotmail.com\">hotmail.com</a>, then I had to delete emails because my 15MB quota was full."
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "I looked at theonion.com; it was no longer funny."
|
||||
|
||||
expected =
|
||||
"I looked at <a href=\"http://theonion.com\">theonion.com</a>; it was no longer funny."
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
end
|
||||
|
||||
test "IDN and punycode domain" do
|
||||
text = "FrauBücher.com says Neiiighhh!"
|
||||
|
||||
expected = "<a href=\"http://FrauBücher.com\">FrauBücher.com</a> says Neiiighhh!"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
|
||||
text = "xn--fraubcher-u9a.com says Neiiighhh!"
|
||||
|
||||
expected =
|
||||
"<a href=\"http://xn--fraubcher-u9a.com\">xn--fraubcher-u9a.com</a> says Neiiighhh!"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
end
|
||||
|
||||
test ".onion domain" do
|
||||
text =
|
||||
"The riseup.net hidden service is at vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion"
|
||||
|
||||
expected =
|
||||
"The <a href=\"http://riseup.net\">riseup.net</a> hidden service is at <a href=\"http://vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion\">vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion</a>"
|
||||
|
||||
assert Linkify.link(text) == expected
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -304,7 +304,7 @@ defmodule Linkify.ParserTest do
|
|||
"misskey.loki"
|
||||
]
|
||||
|
||||
def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li"]
|
||||
def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock", "xmpp:lain@trashserver.net"]
|
||||
def valid_custom_tld_emails, do: ["guardian@33y6fjyhs3phzfjj.onion", "hi@company.null"]
|
||||
def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"]
|
||||
def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock"]
|
||||
def valid_custom_tld_emails, do: ["hi@company.null"]
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue