parser: Validate IPv6, IDN compatibility in email and mentions

This commit is contained in:
Haelwenn (lanodan) Monnier 2020-11-17 16:27:24 +01:00 committed by Mark Felder
parent 26f5310379
commit 649fc9125d
3 changed files with 65 additions and 30 deletions

View file

@ -9,17 +9,11 @@ defmodule Linkify.Parser do
@match_url ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
@match_hostname ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
@match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
@get_scheme_host ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
# @user
# @user@example.com
# credo:disable-for-next-line
@match_mention ~r/^(?:\W*)?(?<long>@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)|^(?:\W*)?(?<short>@[a-zA-Z\d_-]+)/u
# https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
@match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u
@match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
@ -63,7 +57,7 @@ defmodule Linkify.Parser do
~s{Check out <a href="http://google.com">google.com</a>}
"""
@types [:url, :email, :hashtag, :extra, :mention]
@types [:url, :hashtag, :extra, :mention, :email]
def parse(input, opts \\ %{})
def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0)
@ -224,7 +218,11 @@ defmodule Linkify.Parser do
end
def email?(buffer, opts) do
valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, opts)
# Note: In reality the local part can only be checked by the remote server
case Regex.run(~r/^(?<user>.*)@(?<host>[^@]+)$/, buffer, capture: [:user, :host]) do
[_user, hostname] -> valid_hostname?(hostname) && valid_tld?(hostname, opts)
_ -> false
end
end
defp valid_url?(url), do: !Regex.match?(@invalid_url, url)
@ -237,7 +235,7 @@ defmodule Linkify.Parser do
Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme.
"""
def valid_tld?(url, opts) do
[scheme, host] = Regex.run(@match_hostname, url, capture: [:scheme, :host])
[scheme, host] = Regex.run(@get_scheme_host, url, capture: [:scheme, :host])
cond do
opts[:validate_tld] == false ->
@ -256,13 +254,58 @@ defmodule Linkify.Parser do
end
end
def ip?(buffer), do: Regex.match?(@match_ip, buffer)
def safe_to_integer(string, base \\ 10) do
String.to_integer(string, base)
rescue
_ ->
nil
end
def ip?(buffer) do
v4 = String.split(buffer, ".")
v6 =
buffer
|> String.trim_leading("[")
|> String.trim_trailing("]")
|> String.split(":", trim: true)
cond do
length(v4) == 4 ->
!Enum.any?(v4, fn x -> safe_to_integer(x, 10) not in 0..255 end)
length(v6) in 1..8 ->
!Enum.any?(v4, fn x -> safe_to_integer(x, 16) not in 0..0xFFFF end)
false ->
false
end
end
# IDN-compatible, ported from musl-libc's is_valid_hostname()
def valid_hostname?(hostname) do
hostname
|> String.to_charlist()
|> Enum.any?(fn s ->
!(s >= 0x80 || s in 0x30..0x39 || s in 0x41..0x5A || s in 0x61..0x7A || s in '.-')
end)
|> Kernel.!()
end
def match_mention(buffer) do
case Regex.run(@match_mention, buffer, capture: [:long, :short]) do
[mention, ""] -> mention
["", mention] -> mention
_ -> nil
case Regex.run(~r/^@(?<user>[a-zA-Z\d_-]+)(@(?<host>[^@]+))?$/, buffer,
capture: [:user, :host]
) do
[user, ""] ->
"@" <> user
[user, hostname] ->
if valid_hostname?(hostname) && valid_tld?(hostname, []),
do: "@" <> user <> "@" <> hostname,
else: nil
_ ->
nil
end
end

View file

@ -244,7 +244,7 @@ defmodule LinkifyTest do
end
expected =
~s(Hello again, <span class="h-card"><a href="#/user/user">@<span>@user</span></a></span>.&lt;script&gt;&lt;/script&gt;\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>)
~s(Hello again, @user.&lt;script&gt;&lt;/script&gt;\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>)
assert Linkify.link(text,
mention: true,
@ -377,22 +377,14 @@ defmodule LinkifyTest do
text = "That's @user@example.com's server"
expected =
"That's <a href=\"https://example.com/user/user@example.com\">@user@example.com</a>'s server"
assert Linkify.link(text,
mention: true,
mention_prefix: "https://example.com/user/"
) == expected
assert Linkify.link(text, mention: true, mention_prefix: "https://example.com/user/") ==
text
end
test "mentions with symbols before them" do
text = "@@example hey! >@@test@example.com"
test "mentions with no word-separation before them" do
text = "@@example hey! >@@test@example.com idolm@ster"
expected =
"@<a href=\"/users/example\">@example</a> hey! >@<a href=\"/users/test@example.com\">@test@example.com</a>"
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text
end
test "invalid mentions" do

View file

@ -305,6 +305,6 @@ defmodule Linkify.ParserTest do
]
def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"]
def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock", "xmpp:lain@trashserver.net"]
def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock"]
def valid_custom_tld_emails, do: ["hi@company.null"]
end