extract urls
This commit is contained in:
parent
16d310a37e
commit
d33cbbfb1c
2 changed files with 108 additions and 0 deletions
|
@ -18,6 +18,7 @@ namespace BirdsiteLive.Domain.Tools
|
|||
private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)");
|
||||
//private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)");
|
||||
//private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)");
|
||||
private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
|
||||
private readonly InstanceSettings _instanceSettings;
|
||||
|
||||
#region Ctor
|
||||
|
@ -32,6 +33,38 @@ namespace BirdsiteLive.Domain.Tools
|
|||
var tags = new List<Tag>();
|
||||
messageContent = $" {messageContent} ";
|
||||
|
||||
// Extract Urls
|
||||
var urlMatch = _urlRegex.Matches(messageContent);
|
||||
foreach (var m in urlMatch)
|
||||
{
|
||||
var url = m.ToString().Replace("\n", string.Empty).Trim();
|
||||
|
||||
var protocol = "https://";
|
||||
if (url.StartsWith("http://")) protocol = "http://";
|
||||
else if (url.StartsWith("ftp://")) protocol = "ftp://";
|
||||
|
||||
var truncatedUrl = url.Replace(protocol, string.Empty);
|
||||
|
||||
if (truncatedUrl.StartsWith("www."))
|
||||
{
|
||||
protocol += "www.";
|
||||
truncatedUrl = truncatedUrl.Replace("www.", string.Empty);
|
||||
}
|
||||
|
||||
var firstPart = truncatedUrl;
|
||||
var secondPart = string.Empty;
|
||||
|
||||
if (truncatedUrl.Length > 30)
|
||||
{
|
||||
firstPart = truncatedUrl.Substring(0, 30);
|
||||
secondPart = truncatedUrl.Substring(30);
|
||||
}
|
||||
|
||||
messageContent = Regex.Replace(messageContent, m.ToString(),
|
||||
$@" <a href=""{url}"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">{protocol}</span><span class=""ellipsis"">{firstPart}</span><span class=""invisible"">{secondPart}</span></a>");
|
||||
}
|
||||
|
||||
// Extract Hashtags
|
||||
var hashtagMatch = _hastagRegex.Matches(messageContent);
|
||||
foreach (var m in hashtagMatch)
|
||||
{
|
||||
|
@ -49,6 +82,7 @@ namespace BirdsiteLive.Domain.Tools
|
|||
$@" <a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
|
||||
}
|
||||
|
||||
// Extract Mentions
|
||||
var mentionMatch = _mentionRegex.Matches(messageContent);
|
||||
foreach (var m in mentionMatch)
|
||||
{
|
||||
|
|
|
@ -22,6 +22,80 @@ namespace BirdsiteLive.Domain.Tests.Tools
|
|||
}
|
||||
#endregion
|
||||
|
||||
[TestMethod]
|
||||
public void Extract_FormatUrl_Test()
|
||||
{
|
||||
#region Stubs
|
||||
var message = $"Bla!{Environment.NewLine}https://t.co/L8BpyHgg25";
|
||||
#endregion
|
||||
|
||||
var service = new StatusExtractor(_settings);
|
||||
var result = service.ExtractTags(message);
|
||||
|
||||
#region Validations
|
||||
Assert.AreEqual(0, result.tags.Length);
|
||||
|
||||
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://t.co/L8BpyHgg25"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">https://</span><span class=""ellipsis"">t.co/L8BpyHgg25</span><span class=""invisible""></span></a>"));
|
||||
#endregion
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void Extract_FormatUrl_Long_Test()
|
||||
{
|
||||
#region Stubs
|
||||
var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content";
|
||||
#endregion
|
||||
|
||||
var service = new StatusExtractor(_settings);
|
||||
var result = service.ExtractTags(message);
|
||||
|
||||
#region Validations
|
||||
Assert.AreEqual(0, result.tags.Length);
|
||||
|
||||
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">https://www.</span><span class=""ellipsis"">eff.org/deeplinks/2020/07/pact</span><span class=""invisible"">-act-not-solution-problem-harmful-online-content</span></a>"));
|
||||
#endregion
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void Extract_FormatUrl_Exact_Test()
|
||||
{
|
||||
#region Stubs
|
||||
var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact";
|
||||
#endregion
|
||||
|
||||
var service = new StatusExtractor(_settings);
|
||||
var result = service.ExtractTags(message);
|
||||
|
||||
#region Validations
|
||||
Assert.AreEqual(0, result.tags.Length);
|
||||
|
||||
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://www.eff.org/deeplinks/2020/07/pact"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">https://www.</span><span class=""ellipsis"">eff.org/deeplinks/2020/07/pact</span><span class=""invisible""></span></a>"));
|
||||
#endregion
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void Extract_MultiUrls__Test()
|
||||
{
|
||||
#region Stubs
|
||||
var message = $"https://t.co/L8BpyHgg25 Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content";
|
||||
#endregion
|
||||
|
||||
var service = new StatusExtractor(_settings);
|
||||
var result = service.ExtractTags(message);
|
||||
|
||||
#region Validations
|
||||
Assert.AreEqual(0, result.tags.Length);
|
||||
|
||||
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://t.co/L8BpyHgg25"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">https://</span><span class=""ellipsis"">t.co/L8BpyHgg25</span><span class=""invisible""></span></a>"));
|
||||
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content"" rel=""nofollow noopener noreferrer"" target=""_blank""><span class=""invisible"">https://www.</span><span class=""ellipsis"">eff.org/deeplinks/2020/07/pact</span><span class=""invisible"">-act-not-solution-problem-harmful-online-content</span></a>"));
|
||||
#endregion
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void Extract_SingleHashTag_Test()
|
||||
{
|
||||
|
|
Reference in a new issue