372 lines
11 KiB
Elixir
372 lines
11 KiB
Elixir
defmodule AutoLinker.Parser do
|
|
@moduledoc """
|
|
Module to handle parsing the the input string.
|
|
"""
|
|
|
|
alias AutoLinker.Builder
|
|
|
|
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
|
|
|
|
@match_url ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
|
|
|
|
@match_hostname ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
|
|
|
|
@match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
|
|
|
|
# @user
|
|
# @user@example.com
|
|
@match_mention ~r"^@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*|@[a-zA-Z\d_-]+"u
|
|
|
|
# https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
|
|
@match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u
|
|
|
|
@match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
|
|
|
|
@prefix_extra [
|
|
"magnet:?",
|
|
"dweb://",
|
|
"dat://",
|
|
"gopher://",
|
|
"ipfs://",
|
|
"ipns://",
|
|
"irc://",
|
|
"ircs://",
|
|
"irc6://",
|
|
"mumble://",
|
|
"ssb://"
|
|
]
|
|
|
|
@tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> MapSet.new()
|
|
|
|
@default_opts %{
|
|
url: true,
|
|
validate_tld: true
|
|
}
|
|
|
|
@doc """
|
|
Parse the given string, identifying items to link.
|
|
|
|
Parses the string, replacing the matching urls and phone numbers with an html link.
|
|
|
|
## Examples
|
|
|
|
iex> AutoLinker.Parser.parse("Check out google.com")
|
|
~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
|
|
"""
|
|
|
|
def parse(input, opts \\ %{})
|
|
def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0)
|
|
def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
|
|
|
|
def parse(input, opts) do
|
|
opts = Map.merge(@default_opts, opts)
|
|
|
|
|
|
do_parse(input, Map.merge(config, opts))
|
|
end
|
|
|
|
defp do_parse(input, %{url: false} = opts), do: do_parse(input, Map.delete(opts, :url))
|
|
|
|
defp do_parse(input, %{hashtag: true} = opts) do
|
|
input
|
|
|> do_parse(opts, {"", "", :parsing}, &check_and_link_hashtag/3)
|
|
|> do_parse(Map.delete(opts, :hashtag))
|
|
end
|
|
|
|
defp do_parse(input, %{extra: true} = opts) do
|
|
input
|
|
|> do_parse(opts, {"", "", :parsing}, &check_and_link_extra/3)
|
|
|> do_parse(Map.delete(opts, :extra))
|
|
end
|
|
|
|
defp do_parse(input, %{email: true} = opts) do
|
|
input
|
|
|> do_parse(opts, {"", "", :parsing}, &check_and_link_email/3)
|
|
|> do_parse(Map.delete(opts, :email))
|
|
end
|
|
|
|
defp do_parse({text, user_acc}, %{url: _} = opts) do
|
|
input =
|
|
with exclude <- Map.get(opts, :exclude_patterns),
|
|
true <- is_list(exclude),
|
|
true <- String.starts_with?(text, exclude) do
|
|
{text, user_acc}
|
|
else
|
|
_ ->
|
|
do_parse(
|
|
{text, user_acc},
|
|
opts,
|
|
{"", "", :parsing},
|
|
&check_and_link/3
|
|
)
|
|
end
|
|
|
|
do_parse(input, Map.delete(opts, :url))
|
|
end
|
|
|
|
defp do_parse(input, %{mention: true} = opts) do
|
|
input
|
|
|> do_parse(opts, {"", "", :parsing}, &check_and_link_mention/3)
|
|
|> do_parse(Map.delete(opts, :mention))
|
|
end
|
|
|
|
defp do_parse(input, _), do: input
|
|
|
|
defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
|
|
do: {acc, user_acc}
|
|
|
|
defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
|
|
|
|
defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler)
|
|
|
|
defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler)
|
|
|
|
defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
|
|
|
|
defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler)
|
|
|
|
defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler)
|
|
|
|
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
|
|
|
|
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do
|
|
do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler)
|
|
end
|
|
|
|
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
|
|
do:
|
|
do_parse(
|
|
{text, user_acc},
|
|
opts,
|
|
{"", acc <> buffer <> ">", {:html, level}},
|
|
handler
|
|
)
|
|
|
|
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, handler) do
|
|
do_parse({text, user_acc}, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler)
|
|
end
|
|
|
|
defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}, handler) do
|
|
{buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
|
|
|
|
do_parse(
|
|
{text, user_acc},
|
|
opts,
|
|
{"", acc <> buffer <> "</", {:close, level}},
|
|
handler
|
|
)
|
|
end
|
|
|
|
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, handler),
|
|
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, handler)
|
|
|
|
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, handler),
|
|
do:
|
|
do_parse(
|
|
{text, user_acc},
|
|
opts,
|
|
{"", acc <> buffer <> ">", {:html, level - 1}},
|
|
handler
|
|
)
|
|
|
|
defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do
|
|
do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler)
|
|
end
|
|
|
|
defp do_parse(
|
|
{<<char::bytes-size(1), text::binary>>, user_acc},
|
|
opts,
|
|
{buffer, acc, state},
|
|
handler
|
|
)
|
|
when char in [" ", "\r", "\n"] do
|
|
{buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
|
|
|
|
do_parse(
|
|
{text, user_acc},
|
|
opts,
|
|
{"", acc <> buffer <> char, state},
|
|
handler
|
|
)
|
|
end
|
|
|
|
defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}, handler) do
|
|
{buffer, user_acc} = run_handler(handler, buffer <> <<ch::8>>, opts, user_acc)
|
|
|
|
do_parse(
|
|
{"", user_acc},
|
|
opts,
|
|
{"", acc <> buffer, state},
|
|
handler
|
|
)
|
|
end
|
|
|
|
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler),
|
|
do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler)
|
|
|
|
def check_and_link(buffer, opts, _user_acc) do
|
|
str = strip_parens(buffer)
|
|
|
|
if url?(str, opts) do
|
|
case @match_url |> Regex.run(str, capture: [:url]) |> hd() do
|
|
^buffer -> link_url(buffer, opts)
|
|
url -> String.replace(buffer, url, link_url(url, opts))
|
|
end
|
|
else
|
|
buffer
|
|
end
|
|
end
|
|
|
|
defp strip_parens("(" <> buffer) do
|
|
~r/[^\)]*/ |> Regex.run(buffer) |> hd()
|
|
end
|
|
|
|
defp strip_parens(buffer), do: buffer
|
|
|
|
def check_and_link_email(buffer, opts, _user_acc) do
|
|
if email?(buffer, opts), do: link_email(buffer, opts), else: buffer
|
|
end
|
|
|
|
def check_and_link_mention(buffer, opts, user_acc) do
|
|
buffer
|
|
|> match_mention
|
|
|> link_mention(buffer, opts, user_acc)
|
|
end
|
|
|
|
def check_and_link_hashtag(buffer, opts, user_acc) do
|
|
buffer
|
|
|> match_hashtag
|
|
|> link_hashtag(buffer, opts, user_acc)
|
|
end
|
|
|
|
def check_and_link_extra("xmpp:" <> handle, opts, _user_acc) do
|
|
if email?(handle, opts), do: link_extra("xmpp:" <> handle, opts), else: handle
|
|
end
|
|
|
|
def check_and_link_extra(buffer, opts, _user_acc) do
|
|
if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: buffer
|
|
end
|
|
|
|
# @doc false
|
|
|
|
def url?(buffer, opts) do
|
|
valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer, opts)
|
|
end
|
|
|
|
def email?(buffer, opts) do
|
|
valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, opts)
|
|
end
|
|
|
|
defp valid_url?(url), do: !Regex.match?(@invalid_url, url)
|
|
|
|
@doc """
|
|
Validates a URL's TLD. Returns a boolean.
|
|
|
|
Will return `true` if `:validate_tld` option set to `false`.
|
|
|
|
Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme.
|
|
"""
|
|
def valid_tld?(url, opts) do
|
|
[scheme, host] = Regex.run(@match_hostname, url, capture: [:scheme, :host])
|
|
|
|
cond do
|
|
opts[:validate_tld] == false ->
|
|
true
|
|
|
|
ip?(host) ->
|
|
true
|
|
|
|
# don't validate if scheme is present
|
|
opts[:validate_tld] == :no_scheme and scheme != "" ->
|
|
true
|
|
|
|
true ->
|
|
tld = host |> String.split(".") |> List.last()
|
|
MapSet.member?(@tlds, tld)
|
|
end
|
|
end
|
|
|
|
def ip?(buffer), do: Regex.match?(@match_ip, buffer)
|
|
|
|
def match_mention(buffer) do
|
|
case Regex.run(@match_mention, buffer) do
|
|
[mention] -> mention
|
|
_ -> nil
|
|
end
|
|
end
|
|
|
|
def match_hashtag(buffer) do
|
|
case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
|
|
[hashtag] -> hashtag
|
|
_ -> nil
|
|
end
|
|
end
|
|
|
|
def link_hashtag(nil, buffer, _, _user_acc), do: buffer
|
|
|
|
def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do
|
|
hashtag
|
|
|> hashtag_handler.(buffer, opts, user_acc)
|
|
|> maybe_update_buffer(hashtag, buffer)
|
|
end
|
|
|
|
def link_hashtag(hashtag, buffer, opts, _user_acc) do
|
|
hashtag
|
|
|> Builder.create_hashtag_link(buffer, opts)
|
|
|> maybe_update_buffer(hashtag, buffer)
|
|
end
|
|
|
|
def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc}
|
|
|
|
def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do
|
|
mention
|
|
|> mention_handler.(buffer, opts, user_acc)
|
|
|> maybe_update_buffer(mention, buffer)
|
|
end
|
|
|
|
def link_mention(mention, buffer, opts, _user_acc) do
|
|
mention
|
|
|> Builder.create_mention_link(buffer, opts)
|
|
|> maybe_update_buffer(mention, buffer)
|
|
end
|
|
|
|
defp maybe_update_buffer(out, match, buffer) when is_binary(out) do
|
|
maybe_update_buffer({out, nil}, match, buffer)
|
|
end
|
|
|
|
defp maybe_update_buffer({out, user_acc}, match, buffer)
|
|
when match != buffer and out != buffer do
|
|
out = String.replace(buffer, match, out)
|
|
{out, user_acc}
|
|
end
|
|
|
|
defp maybe_update_buffer(out, _match, _buffer), do: out
|
|
|
|
@doc false
|
|
def link_url(buffer, opts) do
|
|
Builder.create_link(buffer, opts)
|
|
end
|
|
|
|
@doc false
|
|
def link_email(buffer, opts) do
|
|
Builder.create_email_link(buffer, opts)
|
|
end
|
|
|
|
def link_extra(buffer, opts) do
|
|
Builder.create_extra_link(buffer, opts)
|
|
end
|
|
|
|
defp run_handler(handler, buffer, opts, user_acc) do
|
|
case handler.(buffer, opts, user_acc) do
|
|
{buffer, user_acc} -> {buffer, user_acc}
|
|
buffer -> {buffer, user_acc}
|
|
end
|
|
end
|
|
end
|