diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index a1c0245..d78bf6e 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -1,4 +1,5 @@ using System.Collections.Generic; +using System.Linq; using System.Text.RegularExpressions; using BirdsiteLive.ActivityPub.Models; using BirdsiteLive.Common.Settings; @@ -13,11 +14,15 @@ namespace BirdsiteLive.Domain.Tools public class StatusExtractor : IStatusExtractor { private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)"); + //private readonly Regex _hastagRegex = new Regex(@"#\w+"); //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); + private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); + //private readonly Regex _mentionRegex = new Regex(@"@\w+"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); + private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); private readonly InstanceSettings _instanceSettings; @@ -34,12 +39,12 @@ namespace BirdsiteLive.Domain.Tools messageContent = $" {messageContent} "; // Replace return lines - messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "

"); - messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
"); + messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "

"); + messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
"); // Extract Urls var urlMatch = _urlRegex.Matches(messageContent); - foreach (var m in urlMatch) + foreach (Match m in urlMatch) { var url = m.ToString().Replace("\n", string.Empty).Trim(); @@ -69,8 +74,8 @@ namespace BirdsiteLive.Domain.Tools } // Extract Hashtags - var hashtagMatch = _hastagRegex.Matches(messageContent); - foreach (var m in hashtagMatch) + var hashtagMatch = OrderByLength(_hastagRegex.Matches(messageContent)); + foreach (Match m in hashtagMatch) { var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); var url = $"https://{_instanceSettings.Domain}/tags/{tag}"; @@ -87,8 +92,8 @@ namespace BirdsiteLive.Domain.Tools } // Extract Mentions - var mentionMatch = _mentionRegex.Matches(messageContent); - foreach (var m in mentionMatch) + var mentionMatch = OrderByLength(_mentionRegex.Matches(messageContent)); + foreach (Match m in mentionMatch) { var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim(); var url = $"https://{_instanceSettings.Domain}/users/{mention}"; @@ -105,7 +110,21 @@ namespace BirdsiteLive.Domain.Tools $@" @{mention}"); } + // Clean up return lines + messageContent = Regex.Replace(messageContent, @"

", "

"); + messageContent = Regex.Replace(messageContent, @"
", "
"); + return (messageContent.Trim(), tags.ToArray()); } + + private IEnumerable OrderByLength(MatchCollection matches) + { + var result = new List(); + + foreach (Match m in matches) result.Add(m); + result = result.OrderByDescending(x => x.Length).ToList(); + + return result; + } } } \ No newline at end of file diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs index 790ba62..5728cc5 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -22,6 +22,38 @@ namespace BirdsiteLive.Domain.Tests.Tools } #endregion + [TestMethod] + public void Extract_ReturnLines_Test() + { + #region Stubs + var message = "Bla.\n\n@Mention blo. https://t.co/pgtrJi9600"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.IsTrue(result.content.Contains("Bla.")); + Assert.IsTrue(result.content.Contains("

")); + #endregion + } + + [TestMethod] + public void Extract_ReturnSingleLines_Test() + { + #region Stubs + var message = "Bla.\n@Mention blo. https://t.co/pgtrJi9600"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.IsTrue(result.content.Contains("Bla.")); + Assert.IsTrue(result.content.Contains("
")); + #endregion + } + [TestMethod] public void Extract_FormatUrl_Test() {