# Copyright © 2017-2018 E-MetroTel
# Copyright © 2019-2022 Pleroma Authors
# SPDX-License-Identifier: MIT
defmodule Linkify.ParserTest do
use ExUnit.Case, async: true
doctest Linkify.Parser
import Linkify.Parser
describe "url?/2" do
test "valid scheme true" do
valid_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: true, validate_tld: true)
end)
end
test "invalid scheme true" do
invalid_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: true, validate_tld: true)
end)
end
test "valid scheme false" do
valid_non_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: false, validate_tld: true)
end)
end
test "invalid scheme false" do
invalid_non_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: false, validate_tld: true)
end)
end
test "checks the tld for url with a scheme when validate_tld: true" do
custom_tld_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: true, validate_tld: true)
end)
end
test "does not check the tld for url with a scheme when validate_tld: false" do
custom_tld_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: true, validate_tld: false)
end)
end
test "does not check the tld for url with a scheme when validate_tld: :no_scheme" do
custom_tld_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: true, validate_tld: :no_scheme)
end)
end
test "checks the tld for url without a scheme when validate_tld: true" do
custom_tld_non_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: false, validate_tld: true)
end)
end
test "checks the tld for url without a scheme when validate_tld: :no_scheme" do
custom_tld_non_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: false, validate_tld: :no_scheme)
end)
end
test "does not check the tld for url without a scheme when validate_tld: false" do
custom_tld_non_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: false, validate_tld: false)
end)
end
end
describe "email?" do
test "identifies valid emails" do
valid_emails()
|> Enum.each(fn email ->
assert email?(email, [])
end)
end
test "identifies invalid emails" do
invalid_emails()
|> Enum.each(fn email ->
refute email?(email, [])
end)
end
test "does not validate tlds when validate_tld: false" do
valid_custom_tld_emails()
|> Enum.each(fn email ->
assert email?(email, validate_tld: false)
end)
end
test "validates tlds when validate_tld: true" do
valid_custom_tld_emails()
|> Enum.each(fn email ->
refute email?(email, validate_tld: true)
end)
end
end
describe "parse" do
test "handle line breakes" do
text = "google.com\r\nssss"
expected = "google.com\r\nssss"
assert parse(text) == expected
end
test "handle angle bracket in the end" do
text = "google.com
"
assert parse(text) == "google.com
"
text = "google.com
hey"
assert parse(text) == "google.com
hey"
text = "hey
google.com"
assert parse(text) == "hey
google.com"
text = "
google.com"
assert parse(text) == "
google.com"
text = "google.com<"
assert parse(text) == "google.com<"
text = "google.com>"
assert parse(text) == "google.com>"
end
test "does not link attributes" do
text = "Check out google"
assert parse(text) == text
text = "Check out "
assert parse(text) == text
text = "Check out "
assert parse(text) == text
end
test "does not link inside `
` and ``" do
text = "google.com
"
assert parse(text) == text
text = "google.com
"
assert parse(text) == text
text = "google.com
"
assert parse(text) == text
end
test "links url inside html" do
text = "google.com"
expected = ""
assert parse(text, class: false, rel: false) == expected
text = "Check out google.com"
expected =
"Check out "
assert parse(text, class: false, rel: false) == expected
end
test "links url inside nested html" do
text = "google.com
"
expected = ""
assert parse(text, class: false, rel: false) == expected
end
test "html links inside html" do
text = ~s()
assert parse(text) == text
text = ~s(google.com)
assert parse(text) == text
text = ~s(google.com
)
assert parse(text) == text
text = ~s(google.com )
assert parse(text) == text
end
test "do not link parens" do
text = " foo (https://example.com/path/folder/), bar"
expected =
" foo (https://example.com/path/folder/), bar"
assert parse(text, class: false, rel: false, scheme: true) == expected
text = " foo (example.com/path/folder/), bar"
expected =
" foo (example.com/path/folder/), bar"
assert parse(text, class: false, rel: false) == expected
end
test "do not link punctuation marks in the end" do
text = "google.com."
assert parse(text) == "google.com."
text = "google.com;"
assert parse(text) == "google.com;"
text = "google.com:"
assert parse(text) == "google.com:"
text = "hack google.com, please"
assert parse(text) == "hack google.com, please"
text = "(check out google.com)"
assert parse(text) == "(check out google.com)"
end
test "double dot in link is allowed" do
text = "https://example.to/something..mp3"
assert parse(text) == "#{text}"
end
test "do not link urls" do
text = "google.com"
assert parse(text, url: false) == text
end
test "do not link `:test.test`" do
text = ":test.test"
assert parse(text, %{
scheme: true,
extra: true,
class: false,
strip_prefix: false,
new_window: false,
rel: false
}) == text
end
end
def valid_number?([list], number) do
assert List.last(list) == number
end
def valid_number?(_, _), do: false
def valid_scheme_urls,
do: [
"https://www.example.com",
"http://www2.example.com",
"http://home.example-site.com",
"http://blog.example.com",
"http://www.example.com/product",
"http://www.example.com/products?id=1&page=2",
"http://www.example.com#up",
"http://255.255.255.255",
"http://www.site.com:8008"
]
def invalid_scheme_urls,
do: [
"http://invalid.com/perl.cgi?key= | http://web-site.com/cgi-bin/perl.cgi?key1=value1&key2"
]
def valid_non_scheme_urls,
do: [
"www.example.com",
"www2.example.com",
"www.example.com:2000",
"www.example.com?abc=1",
"example.example-site.com",
"example.com",
"example.ca",
"example.tv",
"example.com:999?one=one"
]
def invalid_non_scheme_urls,
do: [
"invalid.com/perl.cgi?key= | web-site.com/cgi-bin/perl.cgi?key1=value1&key2",
"invalid.",
"hi..there",
"555.555.5555",
"255.255.255.255",
"255.255.255.255:3000?one=1&two=2"
]
def custom_tld_scheme_urls,
do: [
"http://whatever.null/",
"https://example.o/index.html",
"http://pleroma.000abv/test",
"http://misskey.wwwroot"
]
def custom_tld_non_scheme_urls,
do: [
"whatever.null/",
"example.o/index.html",
"pleroma.000abv/test",
"misskey.wwwroot"
]
def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"]
def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock"]
def valid_custom_tld_emails, do: ["hi@company.null"]
end