fix small link parsing

This commit is contained in:
Nicolas Constant 2021-02-10 00:19:12 -05:00
parent c02b4804f5
commit 674fde74bd
No known key found for this signature in database
GPG Key ID: 1E9F677FB01A5688
3 changed files with 62 additions and 8 deletions

View File

@ -4,6 +4,6 @@ namespace BirdsiteLive.Common.Regexes
{
public class UrlRegexes
{
public static readonly Regex Url = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
public static readonly Regex Url = new Regex(@"(.?)(((http|ftp|https):\/\/)[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
}
}

View File

@ -44,11 +44,8 @@ namespace BirdsiteLive.Domain.Tools
var urlMatch = UrlRegexes.Url.Matches(messageContent);
foreach (Match m in urlMatch)
{
var url = m.ToString().Replace("\n", string.Empty).Trim();
var protocol = "https://";
if (url.StartsWith("http://")) protocol = "http://";
else if (url.StartsWith("ftp://")) protocol = "ftp://";
var url = m.Groups[2].ToString();
var protocol = m.Groups[3].ToString();
var truncatedUrl = url.Replace(protocol, string.Empty);
@ -68,7 +65,7 @@ namespace BirdsiteLive.Domain.Tools
}
messageContent = Regex.Replace(messageContent, m.ToString(),
$@" <a href=""{url}"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">{protocol}</span><span class=""ellipsis"">{firstPart}</span><span class=""invisible"">{secondPart}</span></a>");
$@"{m.Groups[1]}<a href=""{url}"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">{protocol}</span><span class=""ellipsis"">{firstPart}</span><span class=""invisible"">{secondPart}</span></a>");
}
// Extract Hashtags

View File

@ -136,7 +136,7 @@ namespace BirdsiteLive.Domain.Tests.Tools
}
[TestMethod]
public void Extract_MultiUrls__Test()
public void Extract_MultiUrls_Test()
{
#region Stubs
var message = $"https://t.co/L8BpyHgg25 Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content";
@ -160,6 +160,63 @@ namespace BirdsiteLive.Domain.Tests.Tools
#endregion
}
[TestMethod]
public void Extract_SmallUrl_Test()
{
#region Stubs
var message = @"🚀 test http://GOV.UK date 🎉 data http://GOV.UK woopsi.";
#endregion
#region Mocks
var logger = new Mock<ILogger<StatusExtractor>>();
#endregion
var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
Assert.AreEqual(@"🚀 test <a href=""http://GOV.UK"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">http://</span><span class=""ellipsis"">GOV.UK</span><span class=""invisible""></span></a> date 🎉 data <a href=""http://GOV.UK"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">http://</span><span class=""ellipsis"">GOV.UK</span><span class=""invisible""></span></a> woopsi.", result.content);
#endregion
}
[TestMethod]
public void Extract_SmallUrl_2_Test()
{
#region Stubs
var message = @"🚀http://GOV.UK";
#endregion
#region Mocks
var logger = new Mock<ILogger<StatusExtractor>>();
#endregion
var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
Assert.AreEqual(@"🚀<a href=""http://GOV.UK"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">http://</span><span class=""ellipsis"">GOV.UK</span><span class=""invisible""></span></a>", result.content);
#endregion
}
[TestMethod]
public void Extract_SmallUrl_3_Test()
{
#region Stubs
var message = @"🚀http://GOV.UK.";
#endregion
#region Mocks
var logger = new Mock<ILogger<StatusExtractor>>();
#endregion
var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
Assert.AreEqual(@"🚀<a href=""http://GOV.UK"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">http://</span><span class=""ellipsis"">GOV.UK</span><span class=""invisible""></span></a>.", result.content);
#endregion
}
[TestMethod]
public void Extract_SingleHashTag_Test()
{