# Copyright © 2017-2018 E-MetroTel # Copyright © 2019-2022 Pleroma Authors # SPDX-License-Identifier: MIT defmodule Linkify.ParserTest do use ExUnit.Case, async: true doctest Linkify.Parser import Linkify.Parser describe "url?/2" do test "valid scheme true" do valid_scheme_urls() |> Enum.each(fn url -> assert url?(url, scheme: true, validate_tld: true) end) end test "invalid scheme true" do invalid_scheme_urls() |> Enum.each(fn url -> refute url?(url, scheme: true, validate_tld: true) end) end test "valid scheme false" do valid_non_scheme_urls() |> Enum.each(fn url -> assert url?(url, scheme: false, validate_tld: true) end) end test "invalid scheme false" do invalid_non_scheme_urls() |> Enum.each(fn url -> refute url?(url, scheme: false, validate_tld: true) end) end test "checks the tld for url with a scheme when validate_tld: true" do custom_tld_scheme_urls() |> Enum.each(fn url -> refute url?(url, scheme: true, validate_tld: true) end) end test "does not check the tld for url with a scheme when validate_tld: false" do custom_tld_scheme_urls() |> Enum.each(fn url -> assert url?(url, scheme: true, validate_tld: false) end) end test "does not check the tld for url with a scheme when validate_tld: :no_scheme" do custom_tld_scheme_urls() |> Enum.each(fn url -> assert url?(url, scheme: true, validate_tld: :no_scheme) end) end test "checks the tld for url without a scheme when validate_tld: true" do custom_tld_non_scheme_urls() |> Enum.each(fn url -> refute url?(url, scheme: false, validate_tld: true) end) end test "checks the tld for url without a scheme when validate_tld: :no_scheme" do custom_tld_non_scheme_urls() |> Enum.each(fn url -> refute url?(url, scheme: false, validate_tld: :no_scheme) end) end test "does not check the tld for url without a scheme when validate_tld: false" do custom_tld_non_scheme_urls() |> Enum.each(fn url -> assert url?(url, scheme: false, validate_tld: false) end) end end describe "email?" do test "identifies valid emails" do valid_emails() |> Enum.each(fn email -> assert email?(email, []) end) end test "identifies invalid emails" do invalid_emails() |> Enum.each(fn email -> refute email?(email, []) end) end test "does not validate tlds when validate_tld: false" do valid_custom_tld_emails() |> Enum.each(fn email -> assert email?(email, validate_tld: false) end) end test "validates tlds when validate_tld: true" do valid_custom_tld_emails() |> Enum.each(fn email -> refute email?(email, validate_tld: true) end) end end describe "parse" do test "handle line breakes" do text = "google.com\r\nssss" expected = "google.com\r\nssss" assert parse(text) == expected end test "handle angle bracket in the end" do text = "google.com
" assert parse(text) == "google.com
" text = "google.com
hey" assert parse(text) == "google.com
hey" text = "hey
google.com" assert parse(text) == "hey
google.com" text = "
google.com" assert parse(text) == "
google.com" text = "google.com<" assert parse(text) == "google.com<" text = "google.com>" assert parse(text) == "google.com>" end test "does not link attributes" do text = "Check out google" assert parse(text) == text text = "Check out google.com" assert parse(text) == text text = "Check out google.com" assert parse(text) == text end test "does not link inside `
` and ``" do
      text = "
google.com
" assert parse(text) == text text = "google.com" assert parse(text) == text text = "
google.com
" assert parse(text) == text end test "links url inside html" do text = "
google.com
" expected = "
google.com
" assert parse(text, class: false, rel: false) == expected text = "Check out
google.com
" expected = "Check out " assert parse(text, class: false, rel: false) == expected end test "links url inside nested html" do text = "

google.com

" expected = "

google.com

" assert parse(text, class: false, rel: false) == expected end test "html links inside html" do text = ~s(

google.com

) assert parse(text) == text text = ~s(google.com) assert parse(text) == text text = ~s(

google.com

) assert parse(text) == text text = ~s(
  • google.com
  • ) assert parse(text) == text end test "do not link parens" do text = " foo (https://example.com/path/folder/), bar" expected = " foo (https://example.com/path/folder/), bar" assert parse(text, class: false, rel: false, scheme: true) == expected text = " foo (example.com/path/folder/), bar" expected = " foo (example.com/path/folder/), bar" assert parse(text, class: false, rel: false) == expected end test "do not link punctuation marks in the end" do text = "google.com." assert parse(text) == "google.com." text = "google.com;" assert parse(text) == "google.com;" text = "google.com:" assert parse(text) == "google.com:" text = "hack google.com, please" assert parse(text) == "hack google.com, please" text = "(check out google.com)" assert parse(text) == "(check out google.com)" end test "double dot in link is allowed" do text = "https://example.to/something..mp3" assert parse(text) == "#{text}" end test "do not link urls" do text = "google.com" assert parse(text, url: false) == text end test "do not link `:test.test`" do text = ":test.test" assert parse(text, %{ scheme: true, extra: true, class: false, strip_prefix: false, new_window: false, rel: false }) == text end end def valid_number?([list], number) do assert List.last(list) == number end def valid_number?(_, _), do: false def valid_scheme_urls, do: [ "https://www.example.com", "http://www2.example.com", "http://home.example-site.com", "http://blog.example.com", "http://www.example.com/product", "http://www.example.com/products?id=1&page=2", "http://www.example.com#up", "http://255.255.255.255", "http://www.site.com:8008" ] def invalid_scheme_urls, do: [ "http://invalid.com/perl.cgi?key= | http://web-site.com/cgi-bin/perl.cgi?key1=value1&key2" ] def valid_non_scheme_urls, do: [ "www.example.com", "www2.example.com", "www.example.com:2000", "www.example.com?abc=1", "example.example-site.com", "example.com", "example.ca", "example.tv", "example.com:999?one=one" ] def invalid_non_scheme_urls, do: [ "invalid.com/perl.cgi?key= | web-site.com/cgi-bin/perl.cgi?key1=value1&key2", "invalid.", "hi..there", "555.555.5555", "255.255.255.255", "255.255.255.255:3000?one=1&two=2" ] def custom_tld_scheme_urls, do: [ "http://whatever.null/", "https://example.o/index.html", "http://pleroma.000abv/test", "http://misskey.wwwroot" ] def custom_tld_non_scheme_urls, do: [ "whatever.null/", "example.o/index.html", "pleroma.000abv/test", "misskey.wwwroot" ] def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"] def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock"] def valid_custom_tld_emails, do: ["hi@company.null"] end