testing hashtag extraction

This commit is contained in:
Nicolas Constant 2020-07-31 22:49:00 -04:00
parent bfc4dcb4fd
commit 83507614a4
No known key found for this signature in database
GPG key ID: 1E9F677FB01A5688
4 changed files with 134 additions and 52 deletions

View file

@ -6,6 +6,7 @@ using System.Text.RegularExpressions;
using BirdsiteLive.ActivityPub;
using BirdsiteLive.ActivityPub.Models;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain.Tools;
using BirdsiteLive.Twitter.Models;
using Tweetinvi.Models;
using Tweetinvi.Models.Entities;
@ -20,11 +21,13 @@ namespace BirdsiteLive.Domain
public class StatusService : IStatusService
{
private readonly InstanceSettings _instanceSettings;
private readonly IStatusExtractor _statusExtractor;
#region Ctor
public StatusService(InstanceSettings instanceSettings)
public StatusService(InstanceSettings instanceSettings, IStatusExtractor statusExtractor)
{
_instanceSettings = instanceSettings;
_statusExtractor = statusExtractor;
}
#endregion
@ -37,7 +40,7 @@ namespace BirdsiteLive.Domain
var to = $"{actorUrl}/followers";
var apPublic = "https://www.w3.org/ns/activitystreams#Public";
var extractedTags = ExtractTags(tweet.MessageContent);
var extractedTags = _statusExtractor.ExtractTags(tweet.MessageContent);
var note = new Note
{
@ -64,32 +67,6 @@ namespace BirdsiteLive.Domain
return note;
}
private (string content, Tag[] tags) ExtractTags(string messageContent)
{
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
var match = regex.Matches(messageContent);
var tags = new List<Tag>();
foreach (var m in match)
{
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
tags.Add(new Tag
{
name = $"#{tag}",
href = url,
type = "Hashtag"
});
messageContent = messageContent.Replace(
$"#{tag}",
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
}
return (messageContent, new Tag[0]);
}
private Attachment[] Convert(ExtractedMedia[] media)
{
if(media == null) return new Attachment[0];

View file

@ -1,7 +1,53 @@
namespace BirdsiteLive.Domain.Tools
using System.Collections.Generic;
using System.Text.RegularExpressions;
using BirdsiteLive.ActivityPub.Models;
using BirdsiteLive.Common.Settings;
namespace BirdsiteLive.Domain.Tools
{
public class StatusExtractor
public interface IStatusExtractor
{
(string content, Tag[] tags) ExtractTags(string messageContent);
}
public class StatusExtractor : IStatusExtractor
{
private readonly InstanceSettings _instanceSettings;
#region Ctor
public StatusExtractor(InstanceSettings instanceSettings)
{
_instanceSettings = instanceSettings;
}
#endregion
public (string content, Tag[] tags) ExtractTags(string messageContent)
{
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
var match = regex.Matches(messageContent);
var tags = new List<Tag>();
foreach (var m in match)
{
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
tags.Add(new Tag
{
name = $"#{tag}",
href = url,
type = "Hashtag"
});
messageContent = Regex.Replace(messageContent, m.ToString(),
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
//messageContent = messageContent.Replace(
// $"#{tag}",
// $@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
}
return (messageContent, new Tag[0]);
}
}
}

View file

@ -20,29 +20,29 @@ namespace BirdsiteLive.Domain.Tests
}
#endregion
[TestMethod]
public void ExtractMentionsTest()
{
#region Stubs
var username = "MyUserName";
var extractedTweet = new ExtractedTweet
{
Id = 124L,
CreatedAt = DateTime.UtcNow,
MessageContent = @"Getting ready for the weekend...have a great one everyone!
Photo by Tim Tronckoe | @timtronckoe
#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo"
};
#endregion
// [TestMethod]
// public void ExtractMentionsTest()
// {
// #region Stubs
// var username = "MyUserName";
// var extractedTweet = new ExtractedTweet
// {
// Id = 124L,
// CreatedAt = DateTime.UtcNow,
// MessageContent = @"Getting ready for the weekend...have a great one everyone!
//
//Photo by Tim Tronckoe | @timtronckoe
//
//#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo"
// };
// #endregion
var service = new StatusService(_settings);
var result = service.GetStatus(username, extractedTweet);
// var service = new StatusService(_settings);
// var result = service.GetStatus(username, extractedTweet);
#region Validations
// #region Validations
#endregion
}
// #endregion
// }
}
}

View file

@ -0,0 +1,59 @@
using System;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain.Tools;
using BirdsiteLive.Twitter.Models;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace BirdsiteLive.Domain.Tests.Tools
{
[TestClass]
public class StatusExtractorTests
{
private readonly InstanceSettings _settings;
#region Ctor
public StatusExtractorTests()
{
_settings = new InstanceSettings
{
Domain = "domain.name"
};
}
#endregion
[TestMethod]
public void Extract_SingleTag_Test()
{
#region Stubs
var message = $"Bla!{Environment.NewLine}#mytag";
#endregion
var service = new StatusExtractor(_settings);
var result = service.ExtractTags(message);
#region Validations
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
#endregion
}
[TestMethod]
public void Extract_MultiTags_Test()
{
#region Stubs
var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
#endregion
var service = new StatusExtractor(_settings);
var result = service.ExtractTags(message);
#region Validations
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag2"" class=""mention hashtag"" rel=""tag"">#<span>mytag2</span></a>"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag3"" class=""mention hashtag"" rel=""tag"">#<span>mytag3</span></a>"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/bal"" class=""mention hashtag"" rel=""tag"">#<span>bal</span></a>"));
#endregion
}
}
}