From 83507614a4a508f93677d8966c69c6dba349f409 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Fri, 31 Jul 2020 22:49:00 -0400 Subject: [PATCH] testing hashtag extraction --- src/BirdsiteLive.Domain/StatusService.cs | 33 ++--------- .../Tools/StatusExtractor.cs | 52 +++++++++++++++- .../StatusServiceTests.cs | 42 ++++++------- .../Tools/StatusExtractorTests.cs | 59 +++++++++++++++++++ 4 files changed, 134 insertions(+), 52 deletions(-) create mode 100644 src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs diff --git a/src/BirdsiteLive.Domain/StatusService.cs b/src/BirdsiteLive.Domain/StatusService.cs index 32e7f30..172a366 100644 --- a/src/BirdsiteLive.Domain/StatusService.cs +++ b/src/BirdsiteLive.Domain/StatusService.cs @@ -6,6 +6,7 @@ using System.Text.RegularExpressions; using BirdsiteLive.ActivityPub; using BirdsiteLive.ActivityPub.Models; using BirdsiteLive.Common.Settings; +using BirdsiteLive.Domain.Tools; using BirdsiteLive.Twitter.Models; using Tweetinvi.Models; using Tweetinvi.Models.Entities; @@ -20,11 +21,13 @@ namespace BirdsiteLive.Domain public class StatusService : IStatusService { private readonly InstanceSettings _instanceSettings; + private readonly IStatusExtractor _statusExtractor; #region Ctor - public StatusService(InstanceSettings instanceSettings) + public StatusService(InstanceSettings instanceSettings, IStatusExtractor statusExtractor) { _instanceSettings = instanceSettings; + _statusExtractor = statusExtractor; } #endregion @@ -37,7 +40,7 @@ namespace BirdsiteLive.Domain var to = $"{actorUrl}/followers"; var apPublic = "https://www.w3.org/ns/activitystreams#Public"; - var extractedTags = ExtractTags(tweet.MessageContent); + var extractedTags = _statusExtractor.ExtractTags(tweet.MessageContent); var note = new Note { @@ -64,32 +67,6 @@ namespace BirdsiteLive.Domain return note; } - private (string content, Tag[] tags) ExtractTags(string messageContent) - { - var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)"); - var match = regex.Matches(messageContent); - - var tags = new List(); - foreach (var m in match) - { - var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); - var url = $"https://{_instanceSettings.Domain}/tags/{tag}"; - - tags.Add(new Tag - { - name = $"#{tag}", - href = url, - type = "Hashtag" - }); - - messageContent = messageContent.Replace( - $"#{tag}", - $@"#{tag}"); - } - - return (messageContent, new Tag[0]); - } - private Attachment[] Convert(ExtractedMedia[] media) { if(media == null) return new Attachment[0]; diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index ed8951a..c19f97e 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -1,7 +1,53 @@ -namespace BirdsiteLive.Domain.Tools +using System.Collections.Generic; +using System.Text.RegularExpressions; +using BirdsiteLive.ActivityPub.Models; +using BirdsiteLive.Common.Settings; + +namespace BirdsiteLive.Domain.Tools { - public class StatusExtractor + public interface IStatusExtractor { - + (string content, Tag[] tags) ExtractTags(string messageContent); + } + + public class StatusExtractor : IStatusExtractor + { + private readonly InstanceSettings _instanceSettings; + + #region Ctor + public StatusExtractor(InstanceSettings instanceSettings) + { + _instanceSettings = instanceSettings; + } + #endregion + + public (string content, Tag[] tags) ExtractTags(string messageContent) + { + var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)"); + var match = regex.Matches(messageContent); + + var tags = new List(); + foreach (var m in match) + { + var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); + var url = $"https://{_instanceSettings.Domain}/tags/{tag}"; + + tags.Add(new Tag + { + name = $"#{tag}", + href = url, + type = "Hashtag" + }); + + messageContent = Regex.Replace(messageContent, m.ToString(), + $@"#{tag}"); + + //messageContent = messageContent.Replace( + // $"#{tag}", + // $@"#{tag}"); + } + + return (messageContent, new Tag[0]); + } } } \ No newline at end of file diff --git a/src/Tests/BirdsiteLive.Domain.Tests/StatusServiceTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/StatusServiceTests.cs index b1f3f6f..e6178c3 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/StatusServiceTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/StatusServiceTests.cs @@ -20,29 +20,29 @@ namespace BirdsiteLive.Domain.Tests } #endregion - [TestMethod] - public void ExtractMentionsTest() - { - #region Stubs - var username = "MyUserName"; - var extractedTweet = new ExtractedTweet - { - Id = 124L, - CreatedAt = DateTime.UtcNow, - MessageContent = @"Getting ready for the weekend...have a great one everyone! -⁠ -Photo by Tim Tronckoe | @timtronckoe -⁠ -#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo⁠" - }; - #endregion +// [TestMethod] +// public void ExtractMentionsTest() +// { +// #region Stubs +// var username = "MyUserName"; +// var extractedTweet = new ExtractedTweet +// { +// Id = 124L, +// CreatedAt = DateTime.UtcNow, +// MessageContent = @"Getting ready for the weekend...have a great one everyone! +//⁠ +//Photo by Tim Tronckoe | @timtronckoe +//⁠ +//#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo⁠" +// }; +// #endregion - var service = new StatusService(_settings); - var result = service.GetStatus(username, extractedTweet); +// var service = new StatusService(_settings); +// var result = service.GetStatus(username, extractedTweet); - #region Validations +// #region Validations - #endregion - } +// #endregion +// } } } diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs new file mode 100644 index 0000000..d2b63bf --- /dev/null +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -0,0 +1,59 @@ +using System; +using BirdsiteLive.Common.Settings; +using BirdsiteLive.Domain.Tools; +using BirdsiteLive.Twitter.Models; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace BirdsiteLive.Domain.Tests.Tools +{ + [TestClass] + public class StatusExtractorTests + { + private readonly InstanceSettings _settings; + + #region Ctor + public StatusExtractorTests() + { + _settings = new InstanceSettings + { + Domain = "domain.name" + }; + } + #endregion + + [TestMethod] + public void Extract_SingleTag_Test() + { + #region Stubs + var message = $"Bla!{Environment.NewLine}#mytag⁠"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains(@"#mytag")); + #endregion + } + + [TestMethod] + public void Extract_MultiTags_Test() + { + #region Stubs + var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3⁠{Environment.NewLine}Test #bal Test"; + #endregion + + var service = new StatusExtractor(_settings); + var result = service.ExtractTags(message); + + #region Validations + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains(@"#mytag")); + Assert.IsTrue(result.content.Contains(@"#mytag2")); + Assert.IsTrue(result.content.Contains(@"#mytag3")); + Assert.IsTrue(result.content.Contains(@"#bal")); + #endregion + } + } +} \ No newline at end of file