diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index a1c0245..d78bf6e 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -1,4 +1,5 @@ using System.Collections.Generic; +using System.Linq; using System.Text.RegularExpressions; using BirdsiteLive.ActivityPub.Models; using BirdsiteLive.Common.Settings; @@ -13,11 +14,15 @@ namespace BirdsiteLive.Domain.Tools public class StatusExtractor : IStatusExtractor { private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)"); + //private readonly Regex _hastagRegex = new Regex(@"#\w+"); //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); + private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); + //private readonly Regex _mentionRegex = new Regex(@"@\w+"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); + private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); private readonly InstanceSettings _instanceSettings; @@ -34,12 +39,12 @@ namespace BirdsiteLive.Domain.Tools messageContent = $" {messageContent} "; // Replace return lines - messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "
");
- messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
");
+ messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "
");
+ messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
");
// Extract Urls
var urlMatch = _urlRegex.Matches(messageContent);
- foreach (var m in urlMatch)
+ foreach (Match m in urlMatch)
{
var url = m.ToString().Replace("\n", string.Empty).Trim();
@@ -69,8 +74,8 @@ namespace BirdsiteLive.Domain.Tools
}
// Extract Hashtags
- var hashtagMatch = _hastagRegex.Matches(messageContent);
- foreach (var m in hashtagMatch)
+ var hashtagMatch = OrderByLength(_hastagRegex.Matches(messageContent));
+ foreach (Match m in hashtagMatch)
{
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
@@ -87,8 +92,8 @@ namespace BirdsiteLive.Domain.Tools
}
// Extract Mentions
- var mentionMatch = _mentionRegex.Matches(messageContent);
- foreach (var m in mentionMatch)
+ var mentionMatch = OrderByLength(_mentionRegex.Matches(messageContent));
+ foreach (Match m in mentionMatch)
{
var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim();
var url = $"https://{_instanceSettings.Domain}/users/{mention}";
@@ -105,7 +110,21 @@ namespace BirdsiteLive.Domain.Tools
$@" @{mention}");
}
+ // Clean up return lines
+ messageContent = Regex.Replace(messageContent, @"
", "
");
+ messageContent = Regex.Replace(messageContent, @"
", "
");
+
return (messageContent.Trim(), tags.ToArray());
}
+
+ private IEnumerable
"));
+ #endregion
+ }
+
+ [TestMethod]
+ public void Extract_ReturnSingleLines_Test()
+ {
+ #region Stubs
+ var message = "Bla.\n@Mention blo. https://t.co/pgtrJi9600";
+ #endregion
+
+ var service = new StatusExtractor(_settings);
+ var result = service.ExtractTags(message);
+
+ #region Validations
+ Assert.IsTrue(result.content.Contains("Bla."));
+ Assert.IsTrue(result.content.Contains("
"));
+ #endregion
+ }
+
[TestMethod]
public void Extract_FormatUrl_Test()
{