testing hashtag extraction
This commit is contained in:
parent
bfc4dcb4fd
commit
83507614a4
4 changed files with 134 additions and 52 deletions
|
@ -6,6 +6,7 @@ using System.Text.RegularExpressions;
|
|||
using BirdsiteLive.ActivityPub;
|
||||
using BirdsiteLive.ActivityPub.Models;
|
||||
using BirdsiteLive.Common.Settings;
|
||||
using BirdsiteLive.Domain.Tools;
|
||||
using BirdsiteLive.Twitter.Models;
|
||||
using Tweetinvi.Models;
|
||||
using Tweetinvi.Models.Entities;
|
||||
|
@ -20,11 +21,13 @@ namespace BirdsiteLive.Domain
|
|||
public class StatusService : IStatusService
|
||||
{
|
||||
private readonly InstanceSettings _instanceSettings;
|
||||
private readonly IStatusExtractor _statusExtractor;
|
||||
|
||||
#region Ctor
|
||||
public StatusService(InstanceSettings instanceSettings)
|
||||
public StatusService(InstanceSettings instanceSettings, IStatusExtractor statusExtractor)
|
||||
{
|
||||
_instanceSettings = instanceSettings;
|
||||
_statusExtractor = statusExtractor;
|
||||
}
|
||||
#endregion
|
||||
|
||||
|
@ -37,7 +40,7 @@ namespace BirdsiteLive.Domain
|
|||
var to = $"{actorUrl}/followers";
|
||||
var apPublic = "https://www.w3.org/ns/activitystreams#Public";
|
||||
|
||||
var extractedTags = ExtractTags(tweet.MessageContent);
|
||||
var extractedTags = _statusExtractor.ExtractTags(tweet.MessageContent);
|
||||
|
||||
var note = new Note
|
||||
{
|
||||
|
@ -64,32 +67,6 @@ namespace BirdsiteLive.Domain
|
|||
return note;
|
||||
}
|
||||
|
||||
private (string content, Tag[] tags) ExtractTags(string messageContent)
|
||||
{
|
||||
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
|
||||
var match = regex.Matches(messageContent);
|
||||
|
||||
var tags = new List<Tag>();
|
||||
foreach (var m in match)
|
||||
{
|
||||
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
|
||||
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
|
||||
|
||||
tags.Add(new Tag
|
||||
{
|
||||
name = $"#{tag}",
|
||||
href = url,
|
||||
type = "Hashtag"
|
||||
});
|
||||
|
||||
messageContent = messageContent.Replace(
|
||||
$"#{tag}",
|
||||
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
|
||||
}
|
||||
|
||||
return (messageContent, new Tag[0]);
|
||||
}
|
||||
|
||||
private Attachment[] Convert(ExtractedMedia[] media)
|
||||
{
|
||||
if(media == null) return new Attachment[0];
|
||||
|
|
|
@ -1,7 +1,53 @@
|
|||
namespace BirdsiteLive.Domain.Tools
|
||||
using System.Collections.Generic;
|
||||
using System.Text.RegularExpressions;
|
||||
using BirdsiteLive.ActivityPub.Models;
|
||||
using BirdsiteLive.Common.Settings;
|
||||
|
||||
namespace BirdsiteLive.Domain.Tools
|
||||
{
|
||||
public class StatusExtractor
|
||||
public interface IStatusExtractor
|
||||
{
|
||||
|
||||
(string content, Tag[] tags) ExtractTags(string messageContent);
|
||||
}
|
||||
|
||||
public class StatusExtractor : IStatusExtractor
|
||||
{
|
||||
private readonly InstanceSettings _instanceSettings;
|
||||
|
||||
#region Ctor
|
||||
public StatusExtractor(InstanceSettings instanceSettings)
|
||||
{
|
||||
_instanceSettings = instanceSettings;
|
||||
}
|
||||
#endregion
|
||||
|
||||
public (string content, Tag[] tags) ExtractTags(string messageContent)
|
||||
{
|
||||
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
|
||||
var match = regex.Matches(messageContent);
|
||||
|
||||
var tags = new List<Tag>();
|
||||
foreach (var m in match)
|
||||
{
|
||||
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
|
||||
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
|
||||
|
||||
tags.Add(new Tag
|
||||
{
|
||||
name = $"#{tag}",
|
||||
href = url,
|
||||
type = "Hashtag"
|
||||
});
|
||||
|
||||
messageContent = Regex.Replace(messageContent, m.ToString(),
|
||||
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
|
||||
|
||||
//messageContent = messageContent.Replace(
|
||||
// $"#{tag}",
|
||||
// $@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
|
||||
}
|
||||
|
||||
return (messageContent, new Tag[0]);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -20,29 +20,29 @@ namespace BirdsiteLive.Domain.Tests
|
|||
}
|
||||
#endregion
|
||||
|
||||
[TestMethod]
|
||||
public void ExtractMentionsTest()
|
||||
{
|
||||
#region Stubs
|
||||
var username = "MyUserName";
|
||||
var extractedTweet = new ExtractedTweet
|
||||
{
|
||||
Id = 124L,
|
||||
CreatedAt = DateTime.UtcNow,
|
||||
MessageContent = @"Getting ready for the weekend...have a great one everyone!
|
||||
|
||||
Photo by Tim Tronckoe | @timtronckoe
|
||||
|
||||
#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo"
|
||||
};
|
||||
#endregion
|
||||
// [TestMethod]
|
||||
// public void ExtractMentionsTest()
|
||||
// {
|
||||
// #region Stubs
|
||||
// var username = "MyUserName";
|
||||
// var extractedTweet = new ExtractedTweet
|
||||
// {
|
||||
// Id = 124L,
|
||||
// CreatedAt = DateTime.UtcNow,
|
||||
// MessageContent = @"Getting ready for the weekend...have a great one everyone!
|
||||
//
|
||||
//Photo by Tim Tronckoe | @timtronckoe
|
||||
//
|
||||
//#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo"
|
||||
// };
|
||||
// #endregion
|
||||
|
||||
var service = new StatusService(_settings);
|
||||
var result = service.GetStatus(username, extractedTweet);
|
||||
// var service = new StatusService(_settings);
|
||||
// var result = service.GetStatus(username, extractedTweet);
|
||||
|
||||
#region Validations
|
||||
// #region Validations
|
||||
|
||||
#endregion
|
||||
}
|
||||
// #endregion
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
using System;
|
||||
using BirdsiteLive.Common.Settings;
|
||||
using BirdsiteLive.Domain.Tools;
|
||||
using BirdsiteLive.Twitter.Models;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
|
||||
namespace BirdsiteLive.Domain.Tests.Tools
|
||||
{
|
||||
[TestClass]
|
||||
public class StatusExtractorTests
|
||||
{
|
||||
private readonly InstanceSettings _settings;
|
||||
|
||||
#region Ctor
|
||||
public StatusExtractorTests()
|
||||
{
|
||||
_settings = new InstanceSettings
|
||||
{
|
||||
Domain = "domain.name"
|
||||
};
|
||||
}
|
||||
#endregion
|
||||
|
||||
[TestMethod]
|
||||
public void Extract_SingleTag_Test()
|
||||
{
|
||||
#region Stubs
|
||||
var message = $"Bla!{Environment.NewLine}#mytag";
|
||||
#endregion
|
||||
|
||||
var service = new StatusExtractor(_settings);
|
||||
var result = service.ExtractTags(message);
|
||||
|
||||
#region Validations
|
||||
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
|
||||
#endregion
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void Extract_MultiTags_Test()
|
||||
{
|
||||
#region Stubs
|
||||
var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
|
||||
#endregion
|
||||
|
||||
var service = new StatusExtractor(_settings);
|
||||
var result = service.ExtractTags(message);
|
||||
|
||||
#region Validations
|
||||
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag2"" class=""mention hashtag"" rel=""tag"">#<span>mytag2</span></a>"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag3"" class=""mention hashtag"" rel=""tag"">#<span>mytag3</span></a>"));
|
||||
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/bal"" class=""mention hashtag"" rel=""tag"">#<span>bal</span></a>"));
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
}
|
Reference in a new issue