defmodule AutoLinker.Parser do @moduledoc """ Module to handle parsing the the input string. """ alias AutoLinker.Builder @doc """ Parse the given string, identifying items to link. Parses the string, replacing the matching urls and phone numbers with an html link. ## Examples iex> AutoLinker.Parser.parse("Check out google.com") "Check out google.com" iex> AutoLinker.Parser.parse("call me at x9999", phone: true) ~s{call me at x9999} iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true) ~s{or at home on 555.555.5555} iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true) ~s{, work (555) 555-5555} """ # @invalid_url ~r/\.\.+/ @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/ @match_url ~r{^[\w\.-]+(?\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$} @match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$} @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))" @default_opts ~w(url)a @tlds "./priv/tlds.txt" |> File.read!() |> String.trim() |> String.split("\n") def parse(text, opts \\ %{}) def parse(text, list) when is_list(list), do: parse(text, Enum.into(list, %{})) def parse(text, opts) do config = :auto_linker |> Application.get_env(:opts, []) |> Enum.into(%{}) |> Map.put( :attributes, Application.get_env(:auto_linker, :attributes, []) ) opts = Enum.reduce(@default_opts, opts, fn opt, acc -> if is_nil(opts[opt]) and is_nil(config[opt]) do Map.put(acc, opt, true) else acc end end) do_parse(text, Map.merge(config, opts)) end defp do_parse(text, %{phone: false} = opts), do: do_parse(text, Map.delete(opts, :phone)) defp do_parse(text, %{url: false} = opts), do: do_parse(text, Map.delete(opts, :url)) defp do_parse(text, %{phone: _} = opts) do text |> do_parse(false, opts, {"", "", :parsing}, &check_and_link_phone/3) |> do_parse(Map.delete(opts, :phone)) end defp do_parse(text, %{markdown: true} = opts) do text |> Builder.create_markdown_links(opts) |> do_parse(Map.delete(opts, :markdown)) end defp do_parse(text, %{url: _} = opts) do if (exclude = Map.get(opts, :exclude_pattern, false)) && String.starts_with?(text, exclude) do text else do_parse(text, Map.get(opts, :scheme, false), opts, {"", "", :parsing}, &check_and_link/3) end |> do_parse(Map.delete(opts, :url)) end defp do_parse(text, _), do: text defp do_parse("", _scheme, _opts, {"", acc, _}, _handler), do: acc defp do_parse("", scheme, opts, {buffer, acc, _}, handler), do: acc <> handler.(buffer, scheme, opts) defp do_parse(" text, scheme, opts, {buffer, acc, :parsing}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> "" <> text, scheme, opts, {buffer, acc, :skip}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> "", :parsing}, handler) defp do_parse("<" <> text, scheme, opts, {"", acc, :parsing}, handler), do: do_parse(text, scheme, opts, {"<", acc, {:open, 1}}, handler) defp do_parse(">" <> text, scheme, opts, {buffer, acc, {:attrs, level}}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> ">", {:html, level}}, handler) defp do_parse(<> <> text, scheme, opts, {"", acc, {:attrs, level}}, handler), do: do_parse(text, scheme, opts, {"", acc <> <>, {:attrs, level}}, handler) defp do_parse(" text, scheme, opts, {buffer, acc, {:html, level}}, handler), do: do_parse( text, scheme, opts, {"", acc <> handler.(buffer, scheme, opts) <> "" <> text, scheme, opts, {buffer, acc, {:close, 1}}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> ">", :parsing}, handler) defp do_parse(">" <> text, scheme, opts, {buffer, acc, {:close, level}}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> ">", {:html, level - 1}}, handler) defp do_parse(" " <> text, scheme, opts, {buffer, acc, {:open, level}}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> " ", {:attrs, level}}, handler) defp do_parse("\n" <> text, scheme, opts, {buffer, acc, {:open, level}}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> "\n", {:attrs, level}}, handler) # default cases where state is not important defp do_parse(" " <> text, scheme, %{phone: _} = opts, {buffer, acc, state}, handler), do: do_parse(text, scheme, opts, {buffer <> " ", acc, state}, handler) defp do_parse(" " <> text, scheme, opts, {buffer, acc, state}, handler), do: do_parse( text, scheme, opts, {"", acc <> handler.(buffer, scheme, opts) <> " ", state}, handler ) defp do_parse("\n" <> text, scheme, opts, {buffer, acc, state}, handler), do: do_parse( text, scheme, opts, {"", acc <> handler.(buffer, scheme, opts) <> "\n", state}, handler ) defp do_parse(<>, scheme, opts, {buffer, acc, state}, handler), do: do_parse( "", scheme, opts, {"", acc <> handler.(buffer <> <>, scheme, opts), state}, handler ) defp do_parse(<> <> text, scheme, opts, {buffer, acc, state}, handler), do: do_parse(text, scheme, opts, {buffer <> <>, acc, state}, handler) def check_and_link(buffer, scheme, opts) do buffer |> is_url?(scheme) |> link_url(buffer, opts) end def check_and_link_phone(buffer, _, opts) do buffer |> match_phone |> link_phone(buffer, opts) end @doc false def is_url?(buffer, true) do if Regex.match?(@invalid_url, buffer) do false else Regex.run(@match_scheme, buffer, capture: [:tld]) |> is_valid_tld?() end end def is_url?(buffer, _) do if Regex.match?(@invalid_url, buffer) do false else Regex.run(@match_url, buffer, capture: [:tld]) |> is_valid_tld?() end end def is_valid_tld?(["." <> tld]), do: tld in @tlds def is_valid_tld?(_), do: false @doc false def match_phone(buffer) do case Regex.scan(@match_phone, buffer) do [] -> nil other -> other end end def link_phone(nil, buffer, _), do: buffer def link_phone(list, buffer, opts) do Builder.create_phone_link(list, buffer, opts) end @doc false def link_url(true, buffer, opts) do Builder.create_link(buffer, opts) end def link_url(_, buffer, _opts), do: buffer end