From ec420346b6335d982b33810f71d30b444b582ce2 Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Mon, 1 Feb 2021 20:07:53 -0500
Subject: [PATCH 01/14] purge cache when TL retrieval fails, fix #79
---
.../Processors/RetrieveTweetsProcessor.cs | 31 +++++++++---
.../CachedTwitterService.cs | 5 ++
.../TwitterTweetsService.cs | 48 ++++++++-----------
3 files changed, 51 insertions(+), 33 deletions(-)
diff --git a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs
index c381dcf..c0976c5 100644
--- a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs
+++ b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs
@@ -9,6 +9,7 @@ using BirdsiteLive.Pipeline.Contracts;
using BirdsiteLive.Pipeline.Models;
using BirdsiteLive.Twitter;
using BirdsiteLive.Twitter.Models;
+using Microsoft.Extensions.Logging;
using Tweetinvi.Models;
namespace BirdsiteLive.Pipeline.Processors
@@ -16,13 +17,17 @@ namespace BirdsiteLive.Pipeline.Processors
public class RetrieveTweetsProcessor : IRetrieveTweetsProcessor
{
private readonly ITwitterTweetsService _twitterTweetsService;
+ private readonly ITwitterUserService _twitterUserService;
private readonly ITwitterUserDal _twitterUserDal;
+ private readonly ILogger _logger;
#region Ctor
- public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal)
+ public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal, ITwitterUserService twitterUserService, ILogger logger)
{
_twitterTweetsService = twitterTweetsService;
_twitterUserDal = twitterUserDal;
+ _twitterUserService = twitterUserService;
+ _logger = logger;
}
#endregion
@@ -61,11 +66,25 @@ namespace BirdsiteLive.Pipeline.Processors
private ExtractedTweet[] RetrieveNewTweets(SyncTwitterUser user)
{
- ExtractedTweet[] tweets;
- if (user.LastTweetPostedId == -1)
- tweets = _twitterTweetsService.GetTimeline(user.Acct, 1);
- else
- tweets = _twitterTweetsService.GetTimeline(user.Acct, 200, user.LastTweetSynchronizedForAllFollowersId);
+ var tweets = new ExtractedTweet[0];
+
+ try
+ {
+ if (user.LastTweetPostedId == -1)
+ tweets = _twitterTweetsService.GetTimeline(user.Acct, 1);
+ else
+ tweets = _twitterTweetsService.GetTimeline(user.Acct, 200, user.LastTweetSynchronizedForAllFollowersId);
+ }
+ catch (Exception e)
+ {
+ _logger.LogError(e, "Error retrieving TL of {Username} from {LastTweetPostedId}", user.Acct, user.LastTweetPostedId);
+
+ if (_twitterUserService is CachedTwitterUserService service)
+ {
+ _logger.LogInformation("Purge {Username} from cache", user.Acct);
+ service.PurgeUser(user.Acct);
+ }
+ }
return tweets;
}
diff --git a/src/BirdsiteLive.Twitter/CachedTwitterService.cs b/src/BirdsiteLive.Twitter/CachedTwitterService.cs
index 2f0be46..d8ca1fb 100644
--- a/src/BirdsiteLive.Twitter/CachedTwitterService.cs
+++ b/src/BirdsiteLive.Twitter/CachedTwitterService.cs
@@ -38,5 +38,10 @@ namespace BirdsiteLive.Twitter
return user;
}
+
+ public void PurgeUser(string username)
+ {
+ _userCache.Remove(username);
+ }
}
}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Twitter/TwitterTweetsService.cs b/src/BirdsiteLive.Twitter/TwitterTweetsService.cs
index 49684e8..41b2d60 100644
--- a/src/BirdsiteLive.Twitter/TwitterTweetsService.cs
+++ b/src/BirdsiteLive.Twitter/TwitterTweetsService.cs
@@ -61,36 +61,30 @@ namespace BirdsiteLive.Twitter
public ExtractedTweet[] GetTimeline(string username, int nberTweets, long fromTweetId = -1)
{
var tweets = new List();
- try
+
+ _twitterAuthenticationInitializer.EnsureAuthenticationIsInitialized();
+ ExceptionHandler.SwallowWebExceptions = false;
+ TweetinviConfig.CurrentThreadSettings.TweetMode = TweetMode.Extended;
+
+ var user = _twitterUserService.GetUser(username);
+ if (user == null || user.Protected) return new ExtractedTweet[0];
+
+ if (fromTweetId == -1)
{
- _twitterAuthenticationInitializer.EnsureAuthenticationIsInitialized();
- ExceptionHandler.SwallowWebExceptions = false;
- TweetinviConfig.CurrentThreadSettings.TweetMode = TweetMode.Extended;
-
- var user = _twitterUserService.GetUser(username);
- if (user == null || user.Protected) return new ExtractedTweet[0];
-
- if (fromTweetId == -1)
- {
- var timeline = Timeline.GetUserTimeline(user.Id, nberTweets);
- _statisticsHandler.CalledTimelineApi();
- if (timeline != null) tweets.AddRange(timeline);
- }
- else
- {
- var timelineRequestParameters = new UserTimelineParameters
- {
- SinceId = fromTweetId,
- MaximumNumberOfTweetsToRetrieve = nberTweets
- };
- var timeline = Timeline.GetUserTimeline(user.Id, timelineRequestParameters);
- _statisticsHandler.CalledTimelineApi();
- if (timeline != null) tweets.AddRange(timeline);
- }
+ var timeline = Timeline.GetUserTimeline(user.Id, nberTweets);
+ _statisticsHandler.CalledTimelineApi();
+ if (timeline != null) tweets.AddRange(timeline);
}
- catch (Exception e)
+ else
{
- _logger.LogError(e, "Error retrieving timeline from {Username}, from {TweetId}", username, fromTweetId);
+ var timelineRequestParameters = new UserTimelineParameters
+ {
+ SinceId = fromTweetId,
+ MaximumNumberOfTweetsToRetrieve = nberTweets
+ };
+ var timeline = Timeline.GetUserTimeline(user.Id, timelineRequestParameters);
+ _statisticsHandler.CalledTimelineApi();
+ if (timeline != null) tweets.AddRange(timeline);
}
return tweets.Select(_tweetExtractor.Extract).ToArray();
From 0bd8e38f28a30dc88b96f125ba1ce17247bfa66c Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Mon, 1 Feb 2021 20:13:10 -0500
Subject: [PATCH 02/14] refactoring user regex
---
src/BirdsiteLive.Common/Regexes/UserRegex.cs | 9 +++++++++
src/BirdsiteLive/Controllers/UsersController.cs | 4 ++--
src/BirdsiteLive/Controllers/WellKnownController.cs | 4 ++--
3 files changed, 13 insertions(+), 4 deletions(-)
create mode 100644 src/BirdsiteLive.Common/Regexes/UserRegex.cs
diff --git a/src/BirdsiteLive.Common/Regexes/UserRegex.cs b/src/BirdsiteLive.Common/Regexes/UserRegex.cs
new file mode 100644
index 0000000..b3da74a
--- /dev/null
+++ b/src/BirdsiteLive.Common/Regexes/UserRegex.cs
@@ -0,0 +1,9 @@
+using System.Text.RegularExpressions;
+
+namespace BirdsiteLive.Common.Regexes
+{
+ public class UserRegex
+ {
+ public static readonly Regex TwitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$");
+ }
+}
\ No newline at end of file
diff --git a/src/BirdsiteLive/Controllers/UsersController.cs b/src/BirdsiteLive/Controllers/UsersController.cs
index a22ae73..486d9b6 100644
--- a/src/BirdsiteLive/Controllers/UsersController.cs
+++ b/src/BirdsiteLive/Controllers/UsersController.cs
@@ -9,6 +9,7 @@ using System.Threading;
using System.Threading.Tasks;
using BirdsiteLive.ActivityPub;
using BirdsiteLive.ActivityPub.Models;
+using BirdsiteLive.Common.Regexes;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain;
using BirdsiteLive.Models;
@@ -28,7 +29,6 @@ namespace BirdsiteLive.Controllers
private readonly IUserService _userService;
private readonly IStatusService _statusService;
private readonly InstanceSettings _instanceSettings;
- private readonly Regex _twitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$");
#region Ctor
public UsersController(ITwitterUserService twitterUserService, IUserService userService, IStatusService statusService, InstanceSettings instanceSettings, ITwitterTweetsService twitterTweetService)
@@ -62,7 +62,7 @@ namespace BirdsiteLive.Controllers
// Ensure valid username
// https://help.twitter.com/en/managing-your-account/twitter-username-rules
TwitterUser user = null;
- if (!string.IsNullOrWhiteSpace(id) && _twitterAccountRegex.IsMatch(id) && id.Length <= 15)
+ if (!string.IsNullOrWhiteSpace(id) && UserRegex.TwitterAccountRegex.IsMatch(id) && id.Length <= 15)
user = _twitterUserService.GetUser(id);
var acceptHeaders = Request.Headers["Accept"];
diff --git a/src/BirdsiteLive/Controllers/WellKnownController.cs b/src/BirdsiteLive/Controllers/WellKnownController.cs
index 3f060a7..f974665 100644
--- a/src/BirdsiteLive/Controllers/WellKnownController.cs
+++ b/src/BirdsiteLive/Controllers/WellKnownController.cs
@@ -4,6 +4,7 @@ using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using BirdsiteLive.ActivityPub.Converters;
+using BirdsiteLive.Common.Regexes;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.DAL.Contracts;
using BirdsiteLive.Models;
@@ -20,7 +21,6 @@ namespace BirdsiteLive.Controllers
private readonly ITwitterUserService _twitterUserService;
private readonly ITwitterUserDal _twitterUserDal;
private readonly InstanceSettings _settings;
- private readonly Regex _twitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$");
#region Ctor
public WellKnownController(InstanceSettings settings, ITwitterUserService twitterUserService, ITwitterUserDal twitterUserDal)
@@ -164,7 +164,7 @@ namespace BirdsiteLive.Controllers
// Ensure valid username
// https://help.twitter.com/en/managing-your-account/twitter-username-rules
- if (string.IsNullOrWhiteSpace(name) || !_twitterAccountRegex.IsMatch(name) || name.Length > 15 )
+ if (string.IsNullOrWhiteSpace(name) || !UserRegex.TwitterAccountRegex.IsMatch(name) || name.Length > 15 )
return NotFound();
if (!string.IsNullOrWhiteSpace(domain) && domain != _settings.Domain)
From b2be896e95c6a12b0a090881e08fb25e944a92ec Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Mon, 1 Feb 2021 20:19:14 -0500
Subject: [PATCH 03/14] added regexes
---
src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs | 9 +++++++++
src/BirdsiteLive.Common/Regexes/UrlRegexes.cs | 9 +++++++++
src/BirdsiteLive.Common/Regexes/UserRegex.cs | 9 ---------
src/BirdsiteLive.Common/Regexes/UserRegexes.cs | 10 ++++++++++
src/BirdsiteLive/Controllers/UsersController.cs | 2 +-
src/BirdsiteLive/Controllers/WellKnownController.cs | 2 +-
6 files changed, 30 insertions(+), 11 deletions(-)
create mode 100644 src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
create mode 100644 src/BirdsiteLive.Common/Regexes/UrlRegexes.cs
delete mode 100644 src/BirdsiteLive.Common/Regexes/UserRegex.cs
create mode 100644 src/BirdsiteLive.Common/Regexes/UserRegexes.cs
diff --git a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
new file mode 100644
index 0000000..c5e8ed7
--- /dev/null
+++ b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
@@ -0,0 +1,9 @@
+using System.Text.RegularExpressions;
+
+namespace BirdsiteLive.Common.Regexes
+{
+ public class HashtagRegexes
+ {
+ public static readonly Regex Hashtag = new Regex(@"(.)(#[a-zA-Z0-9]+)(\s|$|[.,;:!?/|-])");
+ }
+}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs b/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs
new file mode 100644
index 0000000..ea3e5c2
--- /dev/null
+++ b/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs
@@ -0,0 +1,9 @@
+using System.Text.RegularExpressions;
+
+namespace BirdsiteLive.Common.Regexes
+{
+ public class UrlRegexes
+ {
+ public static readonly Regex Url = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
+ }
+}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Common/Regexes/UserRegex.cs b/src/BirdsiteLive.Common/Regexes/UserRegex.cs
deleted file mode 100644
index b3da74a..0000000
--- a/src/BirdsiteLive.Common/Regexes/UserRegex.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-using System.Text.RegularExpressions;
-
-namespace BirdsiteLive.Common.Regexes
-{
- public class UserRegex
- {
- public static readonly Regex TwitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$");
- }
-}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
new file mode 100644
index 0000000..74c8b2e
--- /dev/null
+++ b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
@@ -0,0 +1,10 @@
+using System.Text.RegularExpressions;
+
+namespace BirdsiteLive.Common.Regexes
+{
+ public class UserRegexes
+ {
+ public static readonly Regex TwitterAccount = new Regex(@"^[a-zA-Z0-9_]+$");
+ public static readonly Regex Mention = new Regex(@"(.)(@[a-zA-Z0-9_]+)(\s|$|[,;:!?/|-]|(. ))");
+ }
+}
\ No newline at end of file
diff --git a/src/BirdsiteLive/Controllers/UsersController.cs b/src/BirdsiteLive/Controllers/UsersController.cs
index 486d9b6..0fbefcd 100644
--- a/src/BirdsiteLive/Controllers/UsersController.cs
+++ b/src/BirdsiteLive/Controllers/UsersController.cs
@@ -62,7 +62,7 @@ namespace BirdsiteLive.Controllers
// Ensure valid username
// https://help.twitter.com/en/managing-your-account/twitter-username-rules
TwitterUser user = null;
- if (!string.IsNullOrWhiteSpace(id) && UserRegex.TwitterAccountRegex.IsMatch(id) && id.Length <= 15)
+ if (!string.IsNullOrWhiteSpace(id) && UserRegexes.TwitterAccount.IsMatch(id) && id.Length <= 15)
user = _twitterUserService.GetUser(id);
var acceptHeaders = Request.Headers["Accept"];
diff --git a/src/BirdsiteLive/Controllers/WellKnownController.cs b/src/BirdsiteLive/Controllers/WellKnownController.cs
index f974665..4151ab0 100644
--- a/src/BirdsiteLive/Controllers/WellKnownController.cs
+++ b/src/BirdsiteLive/Controllers/WellKnownController.cs
@@ -164,7 +164,7 @@ namespace BirdsiteLive.Controllers
// Ensure valid username
// https://help.twitter.com/en/managing-your-account/twitter-username-rules
- if (string.IsNullOrWhiteSpace(name) || !UserRegex.TwitterAccountRegex.IsMatch(name) || name.Length > 15 )
+ if (string.IsNullOrWhiteSpace(name) || !UserRegexes.TwitterAccount.IsMatch(name) || name.Length > 15 )
return NotFound();
if (!string.IsNullOrWhiteSpace(domain) && domain != _settings.Domain)
From 6fac0ceffaa5ffeb3321b1cc7a73b0203bd9a4e8 Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Mon, 1 Feb 2021 20:23:54 -0500
Subject: [PATCH 04/14] refactoring regexes
---
src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs | 11 +++++++++++
src/BirdsiteLive.Domain/Tools/StatusExtractor.cs | 8 ++------
2 files changed, 13 insertions(+), 6 deletions(-)
create mode 100644 src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs
diff --git a/src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs b/src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs
new file mode 100644
index 0000000..a6b8ae5
--- /dev/null
+++ b/src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs
@@ -0,0 +1,11 @@
+using System.Text.RegularExpressions;
+
+namespace BirdsiteLive.Common.Regexes
+{
+ public class EmojiRegexes
+ {
+ public static readonly Regex Emoji = new Regex(EmojiPattern);
+
+ private const string EmojiPattern = @"(?:\uD83D(?:\uDD73\uFE0F?|\uDC41(?:(?:\uFE0F(?:\u200D\uD83D\uDDE8\uFE0F?)?|\u200D\uD83D\uDDE8\uFE0F?))?|[\uDDE8\uDDEF]\uFE0F?|\uDC4B(?:\uD83C[\uDFFB-\uDFFF])?|\uDD90(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|[\uDD96\uDC4C\uDC48\uDC49\uDC46\uDD95\uDC47\uDC4D\uDC4E\uDC4A\uDC4F\uDE4C\uDC50\uDE4F\uDC85\uDCAA\uDC42\uDC43\uDC76\uDC66\uDC67](?:\uD83C[\uDFFB-\uDFFF])?|\uDC71(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2640\u2642]\uFE0F?))?)|\u200D(?:[\u2640\u2642]\uFE0F?)))?|\uDC68(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFC-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFE]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC68\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)|\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)))))?|\uDC69(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFC-\uDFFF]|\uDC68\uD83C[\uDFFC-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFD-\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFD\uDFFF]|\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFE]|\uDC68\uD83C[\uDFFB-\uDFFE])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])|\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])))))?|[\uDC74\uDC75](?:\uD83C[\uDFFB-\uDFFF])?|[\uDE4D\uDE4E\uDE45\uDE46\uDC81\uDE4B\uDE47\uDC6E](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD75(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC82\uDC77](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDC78(?:\uD83C[\uDFFB-\uDFFF])?|\uDC73(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC72\uDC70\uDC7C](?:\uD83C[\uDFFB-\uDFFF])?|[\uDC86\uDC87\uDEB6](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC83\uDD7A](?:\uD83C[\uDFFB-\uDFFF])?|\uDD74(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uDC6F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDEA3\uDEB4\uDEB5](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDEC0\uDECC\uDC6D\uDC6B\uDC6C](?:\uD83C[\uDFFB-\uDFFF])?|\uDDE3\uFE0F?|\uDC15(?:\u200D\uD83E\uDDBA)?|[\uDC3F\uDD4A\uDD77\uDD78\uDDFA\uDEE3\uDEE4\uDEE2\uDEF3\uDEE5\uDEE9\uDEF0\uDECE\uDD70\uDD79\uDDBC\uDD76\uDECD\uDDA5\uDDA8\uDDB1\uDDB2\uDCFD\uDD6F\uDDDE\uDDF3\uDD8B\uDD8A\uDD8C\uDD8D\uDDC2\uDDD2\uDDD3\uDD87\uDDC3\uDDC4\uDDD1\uDDDD\uDEE0\uDDE1\uDEE1\uDDDC\uDECF\uDECB\uDD49]\uFE0F?|[\uDE00\uDE03\uDE04\uDE01\uDE06\uDE05\uDE02\uDE42\uDE43\uDE09\uDE0A\uDE07\uDE0D\uDE18\uDE17\uDE1A\uDE19\uDE0B\uDE1B-\uDE1D\uDE10\uDE11\uDE36\uDE0F\uDE12\uDE44\uDE2C\uDE0C\uDE14\uDE2A\uDE34\uDE37\uDE35\uDE0E\uDE15\uDE1F\uDE41\uDE2E\uDE2F\uDE32\uDE33\uDE26-\uDE28\uDE30\uDE25\uDE22\uDE2D\uDE31\uDE16\uDE23\uDE1E\uDE13\uDE29\uDE2B\uDE24\uDE21\uDE20\uDE08\uDC7F\uDC80\uDCA9\uDC79-\uDC7B\uDC7D\uDC7E\uDE3A\uDE38\uDE39\uDE3B-\uDE3D\uDE40\uDE3F\uDE3E\uDE48-\uDE4A\uDC8B\uDC8C\uDC98\uDC9D\uDC96\uDC97\uDC93\uDC9E\uDC95\uDC9F\uDC94\uDC9B\uDC9A\uDC99\uDC9C\uDDA4\uDCAF\uDCA2\uDCA5\uDCAB\uDCA6\uDCA8\uDCA3\uDCAC\uDCAD\uDCA4\uDC40\uDC45\uDC44\uDC8F\uDC91\uDC6A\uDC64\uDC65\uDC63\uDC35\uDC12\uDC36\uDC29\uDC3A\uDC31\uDC08\uDC2F\uDC05\uDC06\uDC34\uDC0E\uDC2E\uDC02-\uDC04\uDC37\uDC16\uDC17\uDC3D\uDC0F\uDC11\uDC10\uDC2A\uDC2B\uDC18\uDC2D\uDC01\uDC00\uDC39\uDC30\uDC07\uDC3B\uDC28\uDC3C\uDC3E\uDC14\uDC13\uDC23-\uDC27\uDC38\uDC0A\uDC22\uDC0D\uDC32\uDC09\uDC33\uDC0B\uDC2C\uDC1F-\uDC21\uDC19\uDC1A\uDC0C\uDC1B-\uDC1E\uDC90\uDCAE\uDD2A\uDDFE\uDDFB\uDC92\uDDFC\uDDFD\uDD4C\uDED5\uDD4D\uDD4B\uDC88\uDE82-\uDE8A\uDE9D\uDE9E\uDE8B-\uDE8E\uDE90-\uDE9C\uDEF5\uDEFA\uDEB2\uDEF4\uDEF9\uDE8F\uDEA8\uDEA5\uDEA6\uDED1\uDEA7\uDEF6\uDEA4\uDEA2\uDEEB\uDEEC\uDCBA\uDE81\uDE9F-\uDEA1\uDE80\uDEF8\uDD5B\uDD67\uDD50\uDD5C\uDD51\uDD5D\uDD52\uDD5E\uDD53\uDD5F\uDD54\uDD60\uDD55\uDD61\uDD56\uDD62\uDD57\uDD63\uDD58\uDD64\uDD59\uDD65\uDD5A\uDD66\uDD25\uDCA7\uDEF7\uDD2E\uDC53-\uDC62\uDC51\uDC52\uDCFF\uDC84\uDC8D\uDC8E\uDD07-\uDD0A\uDCE2\uDCE3\uDCEF\uDD14\uDD15\uDCFB\uDCF1\uDCF2\uDCDE-\uDCE0\uDD0B\uDD0C\uDCBB\uDCBD-\uDCC0\uDCFA\uDCF7-\uDCF9\uDCFC\uDD0D\uDD0E\uDCA1\uDD26\uDCD4-\uDCDA\uDCD3\uDCD2\uDCC3\uDCDC\uDCC4\uDCF0\uDCD1\uDD16\uDCB0\uDCB4-\uDCB8\uDCB3\uDCB9\uDCB1\uDCB2\uDCE7-\uDCE9\uDCE4-\uDCE6\uDCEB\uDCEA\uDCEC-\uDCEE\uDCDD\uDCBC\uDCC1\uDCC2\uDCC5-\uDCD0\uDD12\uDD13\uDD0F-\uDD11\uDD28\uDD2B\uDD27\uDD29\uDD17\uDD2C\uDD2D\uDCE1\uDC89\uDC8A\uDEAA\uDEBD\uDEBF\uDEC1\uDED2\uDEAC\uDDFF\uDEAE\uDEB0\uDEB9-\uDEBC\uDEBE\uDEC2-\uDEC5\uDEB8\uDEAB\uDEB3\uDEAD\uDEAF\uDEB1\uDEB7\uDCF5\uDD1E\uDD03\uDD04\uDD19-\uDD1D\uDED0\uDD4E\uDD2F\uDD00-\uDD02\uDD3C\uDD3D\uDD05\uDD06\uDCF6\uDCF3\uDCF4\uDD31\uDCDB\uDD30\uDD1F-\uDD24\uDD34\uDFE0-\uDFE2\uDD35\uDFE3-\uDFE5\uDFE7-\uDFE9\uDFE6\uDFEA\uDFEB\uDD36-\uDD3B\uDCA0\uDD18\uDD33\uDD32\uDEA9])|\uD83E(?:[\uDD1A\uDD0F\uDD1E\uDD1F\uDD18\uDD19\uDD1B\uDD1C\uDD32\uDD33\uDDB5\uDDB6\uDDBB\uDDD2](?:\uD83C[\uDFFB-\uDFFF])?|\uDDD1(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1\uD83C[\uDFFB-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?)))?|[\uDDD4\uDDD3](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDCF\uDD26\uDD37](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD34\uDDD5\uDD35\uDD30\uDD31\uDD36](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDB8\uDDB9\uDDD9-\uDDDD](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDDDE\uDDDF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDDCD\uDDCE\uDDD6\uDDD7\uDD38](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD3C(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDD3D\uDD3E\uDD39\uDDD8](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD23\uDD70\uDD29\uDD2A\uDD11\uDD17\uDD2D\uDD2B\uDD14\uDD10\uDD28\uDD25\uDD24\uDD12\uDD15\uDD22\uDD2E\uDD27\uDD75\uDD76\uDD74\uDD2F\uDD20\uDD73\uDD13\uDDD0\uDD7A\uDD71\uDD2C\uDD21\uDD16\uDDE1\uDD0E\uDD0D\uDD1D\uDDBE\uDDBF\uDDE0\uDDB7\uDDB4\uDD3A\uDDB0\uDDB1\uDDB3\uDDB2\uDD8D\uDDA7\uDDAE\uDD8A\uDD9D\uDD81\uDD84\uDD93\uDD8C\uDD99\uDD92\uDD8F\uDD9B\uDD94\uDD87\uDDA5\uDDA6\uDDA8\uDD98\uDDA1\uDD83\uDD85\uDD86\uDDA2\uDD89\uDDA9\uDD9A\uDD9C\uDD8E\uDD95\uDD96\uDD88\uDD8B\uDD97\uDD82\uDD9F\uDDA0\uDD40\uDD6D\uDD5D\uDD65\uDD51\uDD54\uDD55\uDD52\uDD6C\uDD66\uDDC4\uDDC5\uDD5C\uDD50\uDD56\uDD68\uDD6F\uDD5E\uDDC7\uDDC0\uDD69\uDD53\uDD6A\uDD59\uDDC6\uDD5A\uDD58\uDD63\uDD57\uDDC8\uDDC2\uDD6B\uDD6E\uDD5F-\uDD61\uDD80\uDD9E\uDD90\uDD91\uDDAA\uDDC1\uDD67\uDD5B\uDD42\uDD43\uDD64\uDDC3\uDDC9\uDDCA\uDD62\uDD44\uDDED\uDDF1\uDDBD\uDDBC\uDE82\uDDF3\uDE90\uDDE8\uDDE7\uDD47-\uDD49\uDD4E\uDD4F\uDD4D\uDD4A\uDD4B\uDD45\uDD3F\uDD4C\uDE80\uDE81\uDDFF\uDDE9\uDDF8\uDDF5\uDDF6\uDD7D\uDD7C\uDDBA\uDDE3-\uDDE6\uDD7B\uDE71-\uDE73\uDD7E\uDD7F\uDE70\uDDE2\uDE95\uDD41\uDDEE\uDE94\uDDFE\uDE93\uDDAF\uDDF0\uDDF2\uDDEA-\uDDEC\uDE78-\uDE7A\uDE91\uDE92\uDDF4\uDDF7\uDDF9-\uDDFD\uDDEF])|[\u263A\u2639\u2620\u2763\u2764]\uFE0F?|\u270B(?:\uD83C[\uDFFB-\uDFFF])?|[\u270C\u261D](?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\u270A(?:\uD83C[\uDFFB-\uDFFF])?|\u270D(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uD83C(?:\uDF85(?:\uD83C[\uDFFB-\uDFFF])?|\uDFC3(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC7\uDFC2](?:\uD83C[\uDFFB-\uDFFF])?|\uDFCC(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC4\uDFCA](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDFCB(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFF5\uDF36\uDF7D\uDFD4-\uDFD6\uDFDC-\uDFDF\uDFDB\uDFD7\uDFD8\uDFDA\uDFD9\uDFCE\uDFCD\uDF21\uDF24-\uDF2C\uDF97\uDF9F\uDF96\uDF99-\uDF9B\uDF9E\uDFF7\uDD70\uDD71\uDD7E\uDD7F\uDE02\uDE37]\uFE0F?|\uDFF4(?:(?:\u200D\u2620\uFE0F?|\uDB40\uDC67\uDB40\uDC62\uDB40(?:\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F|\uDC73\uDB40\uDC63\uDB40\uDC74\uDB40\uDC7F|\uDC77\uDB40\uDC6C\uDB40\uDC73\uDB40\uDC7F)))?|\uDFF3(?:(?:\uFE0F(?:\u200D\uD83C\uDF08)?|\u200D\uD83C\uDF08))?|\uDDE6\uD83C[\uDDE8-\uDDEC\uDDEE\uDDF1\uDDF2\uDDF4\uDDF6-\uDDFA\uDDFC\uDDFD\uDDFF]|\uDDE7\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEF\uDDF1-\uDDF4\uDDF6-\uDDF9\uDDFB\uDDFC\uDDFE\uDDFF]|\uDDE8\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDEE\uDDF0-\uDDF5\uDDF7\uDDFA-\uDDFF]|\uDDE9\uD83C[\uDDEA\uDDEC\uDDEF\uDDF0\uDDF2\uDDF4\uDDFF]|\uDDEA\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDED\uDDF7-\uDDFA]|\uDDEB\uD83C[\uDDEE-\uDDF0\uDDF2\uDDF4\uDDF7]|\uDDEC\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEE\uDDF1-\uDDF3\uDDF5-\uDDFA\uDDFC\uDDFE]|\uDDED\uD83C[\uDDF0\uDDF2\uDDF3\uDDF7\uDDF9\uDDFA]|\uDDEE\uD83C[\uDDE8-\uDDEA\uDDF1-\uDDF4\uDDF6-\uDDF9]|\uDDEF\uD83C[\uDDEA\uDDF2\uDDF4\uDDF5]|\uDDF0\uD83C[\uDDEA\uDDEC-\uDDEE\uDDF2\uDDF3\uDDF5\uDDF7\uDDFC\uDDFE\uDDFF]|\uDDF1\uD83C[\uDDE6-\uDDE8\uDDEE\uDDF0\uDDF7-\uDDFB\uDDFE]|\uDDF2\uD83C[\uDDE6\uDDE8-\uDDED\uDDF0-\uDDFF]|\uDDF3\uD83C[\uDDE6\uDDE8\uDDEA-\uDDEC\uDDEE\uDDF1\uDDF4\uDDF5\uDDF7\uDDFA\uDDFF]|\uDDF4\uD83C\uDDF2|\uDDF5\uD83C[\uDDE6\uDDEA-\uDDED\uDDF0-\uDDF3\uDDF7-\uDDF9\uDDFC\uDDFE]|\uDDF6\uD83C\uDDE6|\uDDF7\uD83C[\uDDEA\uDDF4\uDDF8\uDDFA\uDDFC]|\uDDF8\uD83C[\uDDE6-\uDDEA\uDDEC-\uDDF4\uDDF7-\uDDF9\uDDFB\uDDFD-\uDDFF]|\uDDF9\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDED\uDDEF-\uDDF4\uDDF7\uDDF9\uDDFB\uDDFC\uDDFF]|\uDDFA\uD83C[\uDDE6\uDDEC\uDDF2\uDDF3\uDDF8\uDDFE\uDDFF]|\uDDFB\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDEE\uDDF3\uDDFA]|\uDDFC\uD83C[\uDDEB\uDDF8]|\uDDFD\uD83C\uDDF0|\uDDFE\uD83C[\uDDEA\uDDF9]|\uDDFF\uD83C[\uDDE6\uDDF2\uDDFC]|[\uDFFB-\uDFFF\uDF38-\uDF3C\uDF37\uDF31-\uDF35\uDF3E-\uDF43\uDF47-\uDF53\uDF45\uDF46\uDF3D\uDF44\uDF30\uDF5E\uDF56\uDF57\uDF54\uDF5F\uDF55\uDF2D-\uDF2F\uDF73\uDF72\uDF7F\uDF71\uDF58-\uDF5D\uDF60\uDF62-\uDF65\uDF61\uDF66-\uDF6A\uDF82\uDF70\uDF6B-\uDF6F\uDF7C\uDF75\uDF76\uDF7E\uDF77-\uDF7B\uDF74\uDFFA\uDF0D-\uDF10\uDF0B\uDFE0-\uDFE6\uDFE8-\uDFED\uDFEF\uDFF0\uDF01\uDF03-\uDF07\uDF09\uDFA0-\uDFA2\uDFAA\uDF11-\uDF20\uDF0C\uDF00\uDF08\uDF02\uDF0A\uDF83\uDF84\uDF86-\uDF8B\uDF8D-\uDF91\uDF80\uDF81\uDFAB\uDFC6\uDFC5\uDFC0\uDFD0\uDFC8\uDFC9\uDFBE\uDFB3\uDFCF\uDFD1-\uDFD3\uDFF8\uDFA3\uDFBD\uDFBF\uDFAF\uDFB1\uDFAE\uDFB0\uDFB2\uDCCF\uDC04\uDFB4\uDFAD\uDFA8\uDF92\uDFA9\uDF93\uDFBC\uDFB5\uDFB6\uDFA4\uDFA7\uDFB7-\uDFBB\uDFA5\uDFAC\uDFEE\uDFF9\uDFE7\uDFA6\uDD8E\uDD91-\uDD9A\uDE01\uDE36\uDE2F\uDE50\uDE39\uDE1A\uDE32\uDE51\uDE38\uDE34\uDE33\uDE3A\uDE35\uDFC1\uDF8C])|\u26F7\uFE0F?|\u26F9(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\u2618\u26F0\u26E9\u2668\u26F4\u2708\u23F1\u23F2\u2600\u2601\u26C8\u2602\u26F1\u2744\u2603\u2604\u26F8\u2660\u2665\u2666\u2663\u265F\u26D1\u260E\u2328\u2709\u270F\u2712\u2702\u26CF\u2692\u2694\u2699\u2696\u26D3\u2697\u26B0\u26B1\u26A0\u2622\u2623\u2B06\u2197\u27A1\u2198\u2B07\u2199\u2B05\u2196\u2195\u2194\u21A9\u21AA\u2934\u2935\u269B\u2721\u2638\u262F\u271D\u2626\u262A\u262E\u25B6\u23ED\u23EF\u25C0\u23EE\u23F8-\u23FA\u23CF\u2640\u2642\u2695\u267E\u267B\u269C\u2611\u2714\u2716\u303D\u2733\u2734\u2747\u203C\u2049\u3030\u00A9\u00AE\u2122]\uFE0F?|[\u0023\u002A\u0030-\u0039](?:\uFE0F\u20E3|\u20E3)|[\u2139\u24C2\u3297\u3299\u25FC\u25FB\u25AA\u25AB]\uFE0F?|[\u2615\u26EA\u26F2\u26FA\u26FD\u2693\u26F5\u231B\u23F3\u231A\u23F0\u2B50\u26C5\u2614\u26A1\u26C4\u2728\u26BD\u26BE\u26F3\u267F\u26D4\u2648-\u2653\u26CE\u23E9-\u23EC\u2B55\u2705\u274C\u274E\u2795-\u2797\u27B0\u27BF\u2753-\u2755\u2757\u26AB\u26AA\u2B1B\u2B1C\u25FE\u25FD])";
+ }
+}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
index 778f6b5..a181ac2 100644
--- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
+++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
@@ -2,6 +2,7 @@
using System.Linq;
using System.Text.RegularExpressions;
using BirdsiteLive.ActivityPub.Models;
+using BirdsiteLive.Common.Regexes;
using BirdsiteLive.Common.Settings;
namespace BirdsiteLive.Domain.Tools
@@ -25,8 +26,6 @@ namespace BirdsiteLive.Domain.Tools
private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
- private readonly Regex _emojiRegex = new Regex(EmojiPattern);
-
private readonly InstanceSettings _instanceSettings;
#region Ctor
@@ -48,7 +47,7 @@ namespace BirdsiteLive.Domain.Tools
messageContent = Regex.Replace(messageContent, @"\(#", "( #");
// Secure emojis
- var emojiMatch = _emojiRegex.Matches(messageContent);
+ var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent);
foreach (Match m in emojiMatch)
messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} ");
@@ -141,8 +140,5 @@ namespace BirdsiteLive.Domain.Tools
return result;
}
-
-
- private const string EmojiPattern = @"(?:\uD83D(?:\uDD73\uFE0F?|\uDC41(?:(?:\uFE0F(?:\u200D\uD83D\uDDE8\uFE0F?)?|\u200D\uD83D\uDDE8\uFE0F?))?|[\uDDE8\uDDEF]\uFE0F?|\uDC4B(?:\uD83C[\uDFFB-\uDFFF])?|\uDD90(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|[\uDD96\uDC4C\uDC48\uDC49\uDC46\uDD95\uDC47\uDC4D\uDC4E\uDC4A\uDC4F\uDE4C\uDC50\uDE4F\uDC85\uDCAA\uDC42\uDC43\uDC76\uDC66\uDC67](?:\uD83C[\uDFFB-\uDFFF])?|\uDC71(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2640\u2642]\uFE0F?))?)|\u200D(?:[\u2640\u2642]\uFE0F?)))?|\uDC68(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFC-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFE]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC68\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)|\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)))))?|\uDC69(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFC-\uDFFF]|\uDC68\uD83C[\uDFFC-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFD-\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFD\uDFFF]|\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFE]|\uDC68\uD83C[\uDFFB-\uDFFE])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])|\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])))))?|[\uDC74\uDC75](?:\uD83C[\uDFFB-\uDFFF])?|[\uDE4D\uDE4E\uDE45\uDE46\uDC81\uDE4B\uDE47\uDC6E](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD75(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC82\uDC77](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDC78(?:\uD83C[\uDFFB-\uDFFF])?|\uDC73(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC72\uDC70\uDC7C](?:\uD83C[\uDFFB-\uDFFF])?|[\uDC86\uDC87\uDEB6](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC83\uDD7A](?:\uD83C[\uDFFB-\uDFFF])?|\uDD74(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uDC6F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDEA3\uDEB4\uDEB5](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDEC0\uDECC\uDC6D\uDC6B\uDC6C](?:\uD83C[\uDFFB-\uDFFF])?|\uDDE3\uFE0F?|\uDC15(?:\u200D\uD83E\uDDBA)?|[\uDC3F\uDD4A\uDD77\uDD78\uDDFA\uDEE3\uDEE4\uDEE2\uDEF3\uDEE5\uDEE9\uDEF0\uDECE\uDD70\uDD79\uDDBC\uDD76\uDECD\uDDA5\uDDA8\uDDB1\uDDB2\uDCFD\uDD6F\uDDDE\uDDF3\uDD8B\uDD8A\uDD8C\uDD8D\uDDC2\uDDD2\uDDD3\uDD87\uDDC3\uDDC4\uDDD1\uDDDD\uDEE0\uDDE1\uDEE1\uDDDC\uDECF\uDECB\uDD49]\uFE0F?|[\uDE00\uDE03\uDE04\uDE01\uDE06\uDE05\uDE02\uDE42\uDE43\uDE09\uDE0A\uDE07\uDE0D\uDE18\uDE17\uDE1A\uDE19\uDE0B\uDE1B-\uDE1D\uDE10\uDE11\uDE36\uDE0F\uDE12\uDE44\uDE2C\uDE0C\uDE14\uDE2A\uDE34\uDE37\uDE35\uDE0E\uDE15\uDE1F\uDE41\uDE2E\uDE2F\uDE32\uDE33\uDE26-\uDE28\uDE30\uDE25\uDE22\uDE2D\uDE31\uDE16\uDE23\uDE1E\uDE13\uDE29\uDE2B\uDE24\uDE21\uDE20\uDE08\uDC7F\uDC80\uDCA9\uDC79-\uDC7B\uDC7D\uDC7E\uDE3A\uDE38\uDE39\uDE3B-\uDE3D\uDE40\uDE3F\uDE3E\uDE48-\uDE4A\uDC8B\uDC8C\uDC98\uDC9D\uDC96\uDC97\uDC93\uDC9E\uDC95\uDC9F\uDC94\uDC9B\uDC9A\uDC99\uDC9C\uDDA4\uDCAF\uDCA2\uDCA5\uDCAB\uDCA6\uDCA8\uDCA3\uDCAC\uDCAD\uDCA4\uDC40\uDC45\uDC44\uDC8F\uDC91\uDC6A\uDC64\uDC65\uDC63\uDC35\uDC12\uDC36\uDC29\uDC3A\uDC31\uDC08\uDC2F\uDC05\uDC06\uDC34\uDC0E\uDC2E\uDC02-\uDC04\uDC37\uDC16\uDC17\uDC3D\uDC0F\uDC11\uDC10\uDC2A\uDC2B\uDC18\uDC2D\uDC01\uDC00\uDC39\uDC30\uDC07\uDC3B\uDC28\uDC3C\uDC3E\uDC14\uDC13\uDC23-\uDC27\uDC38\uDC0A\uDC22\uDC0D\uDC32\uDC09\uDC33\uDC0B\uDC2C\uDC1F-\uDC21\uDC19\uDC1A\uDC0C\uDC1B-\uDC1E\uDC90\uDCAE\uDD2A\uDDFE\uDDFB\uDC92\uDDFC\uDDFD\uDD4C\uDED5\uDD4D\uDD4B\uDC88\uDE82-\uDE8A\uDE9D\uDE9E\uDE8B-\uDE8E\uDE90-\uDE9C\uDEF5\uDEFA\uDEB2\uDEF4\uDEF9\uDE8F\uDEA8\uDEA5\uDEA6\uDED1\uDEA7\uDEF6\uDEA4\uDEA2\uDEEB\uDEEC\uDCBA\uDE81\uDE9F-\uDEA1\uDE80\uDEF8\uDD5B\uDD67\uDD50\uDD5C\uDD51\uDD5D\uDD52\uDD5E\uDD53\uDD5F\uDD54\uDD60\uDD55\uDD61\uDD56\uDD62\uDD57\uDD63\uDD58\uDD64\uDD59\uDD65\uDD5A\uDD66\uDD25\uDCA7\uDEF7\uDD2E\uDC53-\uDC62\uDC51\uDC52\uDCFF\uDC84\uDC8D\uDC8E\uDD07-\uDD0A\uDCE2\uDCE3\uDCEF\uDD14\uDD15\uDCFB\uDCF1\uDCF2\uDCDE-\uDCE0\uDD0B\uDD0C\uDCBB\uDCBD-\uDCC0\uDCFA\uDCF7-\uDCF9\uDCFC\uDD0D\uDD0E\uDCA1\uDD26\uDCD4-\uDCDA\uDCD3\uDCD2\uDCC3\uDCDC\uDCC4\uDCF0\uDCD1\uDD16\uDCB0\uDCB4-\uDCB8\uDCB3\uDCB9\uDCB1\uDCB2\uDCE7-\uDCE9\uDCE4-\uDCE6\uDCEB\uDCEA\uDCEC-\uDCEE\uDCDD\uDCBC\uDCC1\uDCC2\uDCC5-\uDCD0\uDD12\uDD13\uDD0F-\uDD11\uDD28\uDD2B\uDD27\uDD29\uDD17\uDD2C\uDD2D\uDCE1\uDC89\uDC8A\uDEAA\uDEBD\uDEBF\uDEC1\uDED2\uDEAC\uDDFF\uDEAE\uDEB0\uDEB9-\uDEBC\uDEBE\uDEC2-\uDEC5\uDEB8\uDEAB\uDEB3\uDEAD\uDEAF\uDEB1\uDEB7\uDCF5\uDD1E\uDD03\uDD04\uDD19-\uDD1D\uDED0\uDD4E\uDD2F\uDD00-\uDD02\uDD3C\uDD3D\uDD05\uDD06\uDCF6\uDCF3\uDCF4\uDD31\uDCDB\uDD30\uDD1F-\uDD24\uDD34\uDFE0-\uDFE2\uDD35\uDFE3-\uDFE5\uDFE7-\uDFE9\uDFE6\uDFEA\uDFEB\uDD36-\uDD3B\uDCA0\uDD18\uDD33\uDD32\uDEA9])|\uD83E(?:[\uDD1A\uDD0F\uDD1E\uDD1F\uDD18\uDD19\uDD1B\uDD1C\uDD32\uDD33\uDDB5\uDDB6\uDDBB\uDDD2](?:\uD83C[\uDFFB-\uDFFF])?|\uDDD1(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1\uD83C[\uDFFB-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?)))?|[\uDDD4\uDDD3](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDCF\uDD26\uDD37](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD34\uDDD5\uDD35\uDD30\uDD31\uDD36](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDB8\uDDB9\uDDD9-\uDDDD](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDDDE\uDDDF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDDCD\uDDCE\uDDD6\uDDD7\uDD38](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD3C(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDD3D\uDD3E\uDD39\uDDD8](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD23\uDD70\uDD29\uDD2A\uDD11\uDD17\uDD2D\uDD2B\uDD14\uDD10\uDD28\uDD25\uDD24\uDD12\uDD15\uDD22\uDD2E\uDD27\uDD75\uDD76\uDD74\uDD2F\uDD20\uDD73\uDD13\uDDD0\uDD7A\uDD71\uDD2C\uDD21\uDD16\uDDE1\uDD0E\uDD0D\uDD1D\uDDBE\uDDBF\uDDE0\uDDB7\uDDB4\uDD3A\uDDB0\uDDB1\uDDB3\uDDB2\uDD8D\uDDA7\uDDAE\uDD8A\uDD9D\uDD81\uDD84\uDD93\uDD8C\uDD99\uDD92\uDD8F\uDD9B\uDD94\uDD87\uDDA5\uDDA6\uDDA8\uDD98\uDDA1\uDD83\uDD85\uDD86\uDDA2\uDD89\uDDA9\uDD9A\uDD9C\uDD8E\uDD95\uDD96\uDD88\uDD8B\uDD97\uDD82\uDD9F\uDDA0\uDD40\uDD6D\uDD5D\uDD65\uDD51\uDD54\uDD55\uDD52\uDD6C\uDD66\uDDC4\uDDC5\uDD5C\uDD50\uDD56\uDD68\uDD6F\uDD5E\uDDC7\uDDC0\uDD69\uDD53\uDD6A\uDD59\uDDC6\uDD5A\uDD58\uDD63\uDD57\uDDC8\uDDC2\uDD6B\uDD6E\uDD5F-\uDD61\uDD80\uDD9E\uDD90\uDD91\uDDAA\uDDC1\uDD67\uDD5B\uDD42\uDD43\uDD64\uDDC3\uDDC9\uDDCA\uDD62\uDD44\uDDED\uDDF1\uDDBD\uDDBC\uDE82\uDDF3\uDE90\uDDE8\uDDE7\uDD47-\uDD49\uDD4E\uDD4F\uDD4D\uDD4A\uDD4B\uDD45\uDD3F\uDD4C\uDE80\uDE81\uDDFF\uDDE9\uDDF8\uDDF5\uDDF6\uDD7D\uDD7C\uDDBA\uDDE3-\uDDE6\uDD7B\uDE71-\uDE73\uDD7E\uDD7F\uDE70\uDDE2\uDE95\uDD41\uDDEE\uDE94\uDDFE\uDE93\uDDAF\uDDF0\uDDF2\uDDEA-\uDDEC\uDE78-\uDE7A\uDE91\uDE92\uDDF4\uDDF7\uDDF9-\uDDFD\uDDEF])|[\u263A\u2639\u2620\u2763\u2764]\uFE0F?|\u270B(?:\uD83C[\uDFFB-\uDFFF])?|[\u270C\u261D](?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\u270A(?:\uD83C[\uDFFB-\uDFFF])?|\u270D(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uD83C(?:\uDF85(?:\uD83C[\uDFFB-\uDFFF])?|\uDFC3(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC7\uDFC2](?:\uD83C[\uDFFB-\uDFFF])?|\uDFCC(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC4\uDFCA](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDFCB(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFF5\uDF36\uDF7D\uDFD4-\uDFD6\uDFDC-\uDFDF\uDFDB\uDFD7\uDFD8\uDFDA\uDFD9\uDFCE\uDFCD\uDF21\uDF24-\uDF2C\uDF97\uDF9F\uDF96\uDF99-\uDF9B\uDF9E\uDFF7\uDD70\uDD71\uDD7E\uDD7F\uDE02\uDE37]\uFE0F?|\uDFF4(?:(?:\u200D\u2620\uFE0F?|\uDB40\uDC67\uDB40\uDC62\uDB40(?:\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F|\uDC73\uDB40\uDC63\uDB40\uDC74\uDB40\uDC7F|\uDC77\uDB40\uDC6C\uDB40\uDC73\uDB40\uDC7F)))?|\uDFF3(?:(?:\uFE0F(?:\u200D\uD83C\uDF08)?|\u200D\uD83C\uDF08))?|\uDDE6\uD83C[\uDDE8-\uDDEC\uDDEE\uDDF1\uDDF2\uDDF4\uDDF6-\uDDFA\uDDFC\uDDFD\uDDFF]|\uDDE7\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEF\uDDF1-\uDDF4\uDDF6-\uDDF9\uDDFB\uDDFC\uDDFE\uDDFF]|\uDDE8\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDEE\uDDF0-\uDDF5\uDDF7\uDDFA-\uDDFF]|\uDDE9\uD83C[\uDDEA\uDDEC\uDDEF\uDDF0\uDDF2\uDDF4\uDDFF]|\uDDEA\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDED\uDDF7-\uDDFA]|\uDDEB\uD83C[\uDDEE-\uDDF0\uDDF2\uDDF4\uDDF7]|\uDDEC\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEE\uDDF1-\uDDF3\uDDF5-\uDDFA\uDDFC\uDDFE]|\uDDED\uD83C[\uDDF0\uDDF2\uDDF3\uDDF7\uDDF9\uDDFA]|\uDDEE\uD83C[\uDDE8-\uDDEA\uDDF1-\uDDF4\uDDF6-\uDDF9]|\uDDEF\uD83C[\uDDEA\uDDF2\uDDF4\uDDF5]|\uDDF0\uD83C[\uDDEA\uDDEC-\uDDEE\uDDF2\uDDF3\uDDF5\uDDF7\uDDFC\uDDFE\uDDFF]|\uDDF1\uD83C[\uDDE6-\uDDE8\uDDEE\uDDF0\uDDF7-\uDDFB\uDDFE]|\uDDF2\uD83C[\uDDE6\uDDE8-\uDDED\uDDF0-\uDDFF]|\uDDF3\uD83C[\uDDE6\uDDE8\uDDEA-\uDDEC\uDDEE\uDDF1\uDDF4\uDDF5\uDDF7\uDDFA\uDDFF]|\uDDF4\uD83C\uDDF2|\uDDF5\uD83C[\uDDE6\uDDEA-\uDDED\uDDF0-\uDDF3\uDDF7-\uDDF9\uDDFC\uDDFE]|\uDDF6\uD83C\uDDE6|\uDDF7\uD83C[\uDDEA\uDDF4\uDDF8\uDDFA\uDDFC]|\uDDF8\uD83C[\uDDE6-\uDDEA\uDDEC-\uDDF4\uDDF7-\uDDF9\uDDFB\uDDFD-\uDDFF]|\uDDF9\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDED\uDDEF-\uDDF4\uDDF7\uDDF9\uDDFB\uDDFC\uDDFF]|\uDDFA\uD83C[\uDDE6\uDDEC\uDDF2\uDDF3\uDDF8\uDDFE\uDDFF]|\uDDFB\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDEE\uDDF3\uDDFA]|\uDDFC\uD83C[\uDDEB\uDDF8]|\uDDFD\uD83C\uDDF0|\uDDFE\uD83C[\uDDEA\uDDF9]|\uDDFF\uD83C[\uDDE6\uDDF2\uDDFC]|[\uDFFB-\uDFFF\uDF38-\uDF3C\uDF37\uDF31-\uDF35\uDF3E-\uDF43\uDF47-\uDF53\uDF45\uDF46\uDF3D\uDF44\uDF30\uDF5E\uDF56\uDF57\uDF54\uDF5F\uDF55\uDF2D-\uDF2F\uDF73\uDF72\uDF7F\uDF71\uDF58-\uDF5D\uDF60\uDF62-\uDF65\uDF61\uDF66-\uDF6A\uDF82\uDF70\uDF6B-\uDF6F\uDF7C\uDF75\uDF76\uDF7E\uDF77-\uDF7B\uDF74\uDFFA\uDF0D-\uDF10\uDF0B\uDFE0-\uDFE6\uDFE8-\uDFED\uDFEF\uDFF0\uDF01\uDF03-\uDF07\uDF09\uDFA0-\uDFA2\uDFAA\uDF11-\uDF20\uDF0C\uDF00\uDF08\uDF02\uDF0A\uDF83\uDF84\uDF86-\uDF8B\uDF8D-\uDF91\uDF80\uDF81\uDFAB\uDFC6\uDFC5\uDFC0\uDFD0\uDFC8\uDFC9\uDFBE\uDFB3\uDFCF\uDFD1-\uDFD3\uDFF8\uDFA3\uDFBD\uDFBF\uDFAF\uDFB1\uDFAE\uDFB0\uDFB2\uDCCF\uDC04\uDFB4\uDFAD\uDFA8\uDF92\uDFA9\uDF93\uDFBC\uDFB5\uDFB6\uDFA4\uDFA7\uDFB7-\uDFBB\uDFA5\uDFAC\uDFEE\uDFF9\uDFE7\uDFA6\uDD8E\uDD91-\uDD9A\uDE01\uDE36\uDE2F\uDE50\uDE39\uDE1A\uDE32\uDE51\uDE38\uDE34\uDE33\uDE3A\uDE35\uDFC1\uDF8C])|\u26F7\uFE0F?|\u26F9(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\u2618\u26F0\u26E9\u2668\u26F4\u2708\u23F1\u23F2\u2600\u2601\u26C8\u2602\u26F1\u2744\u2603\u2604\u26F8\u2660\u2665\u2666\u2663\u265F\u26D1\u260E\u2328\u2709\u270F\u2712\u2702\u26CF\u2692\u2694\u2699\u2696\u26D3\u2697\u26B0\u26B1\u26A0\u2622\u2623\u2B06\u2197\u27A1\u2198\u2B07\u2199\u2B05\u2196\u2195\u2194\u21A9\u21AA\u2934\u2935\u269B\u2721\u2638\u262F\u271D\u2626\u262A\u262E\u25B6\u23ED\u23EF\u25C0\u23EE\u23F8-\u23FA\u23CF\u2640\u2642\u2695\u267E\u267B\u269C\u2611\u2714\u2716\u303D\u2733\u2734\u2747\u203C\u2049\u3030\u00A9\u00AE\u2122]\uFE0F?|[\u0023\u002A\u0030-\u0039](?:\uFE0F\u20E3|\u20E3)|[\u2139\u24C2\u3297\u3299\u25FC\u25FB\u25AA\u25AB]\uFE0F?|[\u2615\u26EA\u26F2\u26FA\u26FD\u2693\u26F5\u231B\u23F3\u231A\u23F0\u2B50\u26C5\u2614\u26A1\u26C4\u2728\u26BD\u26BE\u26F3\u267F\u26D4\u2648-\u2653\u26CE\u23E9-\u23EC\u2B55\u2705\u274C\u274E\u2795-\u2797\u27B0\u27BF\u2753-\u2755\u2757\u26AB\u26AA\u2B1B\u2B1C\u25FE\u25FD])";
}
}
\ No newline at end of file
From c7bf5f79f8a205a91bda0c8cf40e9486b51c81fa Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Mon, 1 Feb 2021 21:48:47 -0500
Subject: [PATCH 05/14] testing new Hashtag regex
---
.../Regexes/HashtagRegexes.cs | 3 +-
.../Tools/StatusExtractor.cs | 46 ++++--
.../Tools/StatusExtractorTests.cs | 137 ++++++++++++++----
3 files changed, 144 insertions(+), 42 deletions(-)
diff --git a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
index c5e8ed7..99b2f32 100644
--- a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
+++ b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
@@ -4,6 +4,7 @@ namespace BirdsiteLive.Common.Regexes
{
public class HashtagRegexes
{
- public static readonly Regex Hashtag = new Regex(@"(.)(#[a-zA-Z0-9]+)(\s|$|[.,;:!?/|-])");
+ public static readonly Regex HashtagName = new Regex(@"^[a-zA-Z0-9_]+$");
+ public static readonly Regex Hashtag = new Regex(@"(.?)#([a-zA-Z0-9_]+)(\s|$|[<.,;:!?/|-])");
}
}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
index a181ac2..9429096 100644
--- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
+++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
@@ -4,6 +4,8 @@ using System.Text.RegularExpressions;
using BirdsiteLive.ActivityPub.Models;
using BirdsiteLive.Common.Regexes;
using BirdsiteLive.Common.Settings;
+using BirdsiteLive.Twitter;
+using Microsoft.Extensions.Logging;
namespace BirdsiteLive.Domain.Tools
{
@@ -14,7 +16,7 @@ namespace BirdsiteLive.Domain.Tools
public class StatusExtractor : IStatusExtractor
{
- private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)");
+ //private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)");
//private readonly Regex _hastagRegex = new Regex(@"#\w+");
//private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)");
//private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)");
@@ -27,29 +29,31 @@ namespace BirdsiteLive.Domain.Tools
private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
private readonly InstanceSettings _instanceSettings;
+ private readonly ILogger _logger;
#region Ctor
- public StatusExtractor(InstanceSettings instanceSettings)
+ public StatusExtractor(InstanceSettings instanceSettings, ILogger logger)
{
_instanceSettings = instanceSettings;
+ _logger = logger;
}
#endregion
public (string content, Tag[] tags) Extract(string messageContent, bool extractMentions = true)
{
var tags = new List();
- messageContent = $" {messageContent} ";
+ //messageContent = $" {messageContent} ";
// Replace return lines
- messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "
");
- messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
");
- messageContent = Regex.Replace(messageContent, @"\(@", "( @");
- messageContent = Regex.Replace(messageContent, @"\(#", "( #");
+ messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "
");
+ messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
");
+ //messageContent = Regex.Replace(messageContent, @"\(@", "( @");
+ //messageContent = Regex.Replace(messageContent, @"\(#", "( #");
- // Secure emojis
- var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent);
- foreach (Match m in emojiMatch)
- messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} ");
+ //// Secure emojis
+ //var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent);
+ //foreach (Match m in emojiMatch)
+ // messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} ");
// Extract Urls
var urlMatch = _urlRegex.Matches(messageContent);
@@ -83,12 +87,19 @@ namespace BirdsiteLive.Domain.Tools
}
// Extract Hashtags
- var hashtagMatch = OrderByLength(_hastagRegex.Matches(messageContent));
+ var hashtagMatch = OrderByLength(HashtagRegexes.Hashtag.Matches(messageContent));
foreach (Match m in hashtagMatch.OrderByDescending(x => x.Length))
{
- var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
- var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
+ var tag = m.Groups[2].ToString();
+ //var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
+ if (!HashtagRegexes.HashtagName.IsMatch(tag))
+ {
+ _logger.LogError("Parsing Hashtag failed: {Tag} on {Content}", tag, messageContent);
+ continue;
+ }
+
+ var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
tags.Add(new Tag
{
name = $"#{tag}",
@@ -96,8 +107,11 @@ namespace BirdsiteLive.Domain.Tools
type = "Hashtag"
});
- messageContent = Regex.Replace(messageContent, m.ToString(),
- $@" #{tag}");
+ //messageContent = Regex.Replace(messageContent, m.ToString(),
+ // $@" #{tag}");
+
+ messageContent = Regex.Replace(messageContent, m.Groups[0].ToString(),
+ $@"{m.Groups[1]}#{tag}{m.Groups[3]}");
}
// Extract Mentions
diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
index f5dc91a..f126b12 100644
--- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
+++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
@@ -3,7 +3,9 @@ using System.Linq;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain.Tools;
using BirdsiteLive.Twitter.Models;
+using Microsoft.Extensions.Logging;
using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Moq;
namespace BirdsiteLive.Domain.Tests.Tools
{
@@ -28,11 +30,16 @@ namespace BirdsiteLive.Domain.Tests.Tools
#region Stubs
var message = "Bla.\n\n@Mention blo. https://t.co/pgtrJi9600";
#endregion
-
- var service = new StatusExtractor(_settings);
+
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.IsTrue(result.content.Contains("Bla."));
Assert.IsTrue(result.content.Contains("
"));
#endregion
@@ -45,10 +52,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = "Bla.\n@Mention blo. https://t.co/pgtrJi9600";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.IsTrue(result.content.Contains("Bla."));
Assert.IsTrue(result.content.Contains("
"));
#endregion
@@ -61,10 +73,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}https://t.co/L8BpyHgg25";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -79,10 +96,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -97,10 +119,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -115,10 +142,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"https://t.co/L8BpyHgg25 Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(0, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
@@ -132,13 +164,18 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleHashTag_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}#mytag";
+ var message = $"Bla!{Environment.NewLine}#mytag";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("#mytag", result.tags.First().name);
Assert.AreEqual("Hashtag", result.tags.First().type);
@@ -153,13 +190,18 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleHashTag_AtStart_Test()
{
#region Stubs
- var message = $"#mytag Bla!";
+ var message = "#mytag Bla!";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("#mytag", result.tags.First().name);
Assert.AreEqual("Hashtag", result.tags.First().type);
@@ -174,20 +216,25 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleHashTag_SpecialChar_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}#COVIDー19";
+ var message = $"Bla!{Environment.NewLine}#COVID_19";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
- Assert.AreEqual("#COVIDー19", result.tags.First().name);
+ Assert.AreEqual("#COVID_19", result.tags.First().name);
Assert.AreEqual("Hashtag", result.tags.First().type);
- Assert.AreEqual("https://domain.name/tags/COVIDー19", result.tags.First().href);
+ Assert.AreEqual("https://domain.name/tags/COVID_19", result.tags.First().href);
Assert.IsTrue(result.content.Contains("Bla!"));
- Assert.IsTrue(result.content.Contains(@"#COVIDー19"));
+ Assert.IsTrue(result.content.Contains(@"#COVID_19"));
#endregion
}
@@ -195,13 +242,18 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_MultiHashTags_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
+ var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(4, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"#mytag"));
@@ -218,10 +270,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@mynickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("@mynickname@domain.name", result.tags.First().name);
Assert.AreEqual("Mention", result.tags.First().type);
@@ -239,10 +296,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@my___nickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("@my___nickname@domain.name", result.tags.First().name);
Assert.AreEqual("Mention", result.tags.First().type);
@@ -260,10 +322,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"@mynickname Bla!";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.AreEqual("@mynickname@domain.name", result.tags.First().name);
Assert.AreEqual("Mention", result.tags.First().type);
@@ -281,10 +348,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@mynickname @mynickname2 @mynickname3{Environment.NewLine}Test @dada Test";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(4, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"@mynickname"));
@@ -301,10 +373,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
var message = $"Bla!{Environment.NewLine}@mynickname #mytag2 @mynickname3{Environment.NewLine}Test @dada #dada Test";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(5, result.tags.Length);
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"@mynickname"));
@@ -324,10 +401,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
//var message = $"tests@mynickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.IsTrue(result.content.Contains(
@"😤 @mynickname"));
@@ -344,10 +426,15 @@ namespace BirdsiteLive.Domain.Tests.Tools
//var message = $"tests@mynickname";
#endregion
- var service = new StatusExtractor(_settings);
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
var result = service.Extract(message);
#region Validations
+ logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.IsTrue(result.content.Equals(@"bla ( @mynickname test)"));
#endregion
From c409a93b185747a77c0d84eb872001a37dbee23c Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 00:24:33 -0500
Subject: [PATCH 06/14] testing new mention regex
---
.../Regexes/UserRegexes.cs | 2 +-
.../Tools/StatusExtractor.cs | 33 ++++++++++++-------
.../Tools/StatusExtractorTests.cs | 13 ++++----
3 files changed, 29 insertions(+), 19 deletions(-)
diff --git a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
index 74c8b2e..a9f1053 100644
--- a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
+++ b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
@@ -5,6 +5,6 @@ namespace BirdsiteLive.Common.Regexes
public class UserRegexes
{
public static readonly Regex TwitterAccount = new Regex(@"^[a-zA-Z0-9_]+$");
- public static readonly Regex Mention = new Regex(@"(.)(@[a-zA-Z0-9_]+)(\s|$|[,;:!?/|-]|(. ))");
+ public static readonly Regex Mention = new Regex(@"(.?)@([a-zA-Z0-9_]+)(\s|$|[<,;:!?/|-]|(. ))");
}
}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
index 9429096..0d4c1f3 100644
--- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
+++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
@@ -21,7 +21,7 @@ namespace BirdsiteLive.Domain.Tools
//private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)");
//private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)");
- private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)");
+ //private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)");
//private readonly Regex _mentionRegex = new Regex(@"@\w+");
//private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)");
//private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)");
@@ -110,17 +110,25 @@ namespace BirdsiteLive.Domain.Tools
//messageContent = Regex.Replace(messageContent, m.ToString(),
// $@" #{tag}");
- messageContent = Regex.Replace(messageContent, m.Groups[0].ToString(),
+ messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()),
$@"{m.Groups[1]}#{tag}{m.Groups[3]}");
}
// Extract Mentions
if (extractMentions)
{
- var mentionMatch = OrderByLength(_mentionRegex.Matches(messageContent));
+ var mentionMatch = OrderByLength(UserRegexes.Mention.Matches(messageContent));
foreach (Match m in mentionMatch.OrderByDescending(x => x.Length))
{
- var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim();
+ var mention = m.Groups[2].ToString();
+ //var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim();
+
+ if (!UserRegexes.TwitterAccount.IsMatch(mention))
+ {
+ _logger.LogError("Parsing Mention failed: {Mention} on {Content}", mention, messageContent);
+ continue;
+ }
+
var url = $"https://{_instanceSettings.Domain}/users/{mention}";
var name = $"@{mention}@{_instanceSettings.Domain}";
@@ -131,16 +139,19 @@ namespace BirdsiteLive.Domain.Tools
type = "Mention"
});
- messageContent = Regex.Replace(messageContent, m.ToString(),
- $@" @{mention}");
+ //messageContent = Regex.Replace(messageContent, m.ToString(),
+ // $@" @{mention}");
+
+ messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()),
+ $@"{m.Groups[1]}@{mention}{m.Groups[3]}");
}
}
- // Clean up return lines
- messageContent = Regex.Replace(messageContent, @" ", "
");
- messageContent = Regex.Replace(messageContent, @"
", "
");
- messageContent = Regex.Replace(messageContent, @" ", " ");
- messageContent = Regex.Replace(messageContent, @" ", " ");
+ //// Clean up return lines
+ //messageContent = Regex.Replace(messageContent, @"
", "
");
+ //messageContent = Regex.Replace(messageContent, @"
", "
");
+ //messageContent = Regex.Replace(messageContent, @" ", " ");
+ //messageContent = Regex.Replace(messageContent, @" ", " ");
return (messageContent.Trim(), tags.ToArray());
}
diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
index f126b12..4ed8ee8 100644
--- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
+++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
@@ -267,7 +267,7 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleMentionTag_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}@mynickname";
+ var message = $"Bla!{Environment.NewLine}@mynickname";
#endregion
#region Mocks
@@ -293,7 +293,7 @@ namespace BirdsiteLive.Domain.Tests.Tools
public void Extract_SingleMentionTag_SpecialChar_Test()
{
#region Stubs
- var message = $"Bla!{Environment.NewLine}@my___nickname";
+ var message = $"Bla!{Environment.NewLine}@my___nickname";
#endregion
#region Mocks
@@ -391,8 +391,7 @@ namespace BirdsiteLive.Domain.Tests.Tools
Assert.IsTrue(result.content.Contains(@"#dada"));
#endregion
}
-
-
+
[TestMethod]
public void Extract_Emoji_Test()
{
@@ -412,9 +411,9 @@ namespace BirdsiteLive.Domain.Tests.Tools
logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
Assert.IsTrue(result.content.Contains(
- @"😤 @mynickname"));
+ @"😤@mynickname"));
- Assert.IsTrue(result.content.Contains(@"😎 😍 🤗 🤩 😘"));
+ Assert.IsTrue(result.content.Contains(@"😎😍🤗🤩😘"));
#endregion
}
@@ -436,7 +435,7 @@ namespace BirdsiteLive.Domain.Tests.Tools
#region Validations
logger.VerifyAll();
Assert.AreEqual(1, result.tags.Length);
- Assert.IsTrue(result.content.Equals(@"bla ( @mynickname test)"));
+ Assert.IsTrue(result.content.Equals(@"bla (@mynickname test)"));
#endregion
}
}
From 8daebbc8199bf617ba14c1b42744f3f56e5f7057 Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 00:25:36 -0500
Subject: [PATCH 07/14] clean up
---
.../Tools/StatusExtractor.cs | 19 +------------------
1 file changed, 1 insertion(+), 18 deletions(-)
diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
index 0d4c1f3..b6b571c 100644
--- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
+++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
@@ -42,13 +42,10 @@ namespace BirdsiteLive.Domain.Tools
public (string content, Tag[] tags) Extract(string messageContent, bool extractMentions = true)
{
var tags = new List();
- //messageContent = $" {messageContent} ";
// Replace return lines
messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "
");
messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
");
- //messageContent = Regex.Replace(messageContent, @"\(@", "( @");
- //messageContent = Regex.Replace(messageContent, @"\(#", "( #");
//// Secure emojis
//var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent);
@@ -91,7 +88,6 @@ namespace BirdsiteLive.Domain.Tools
foreach (Match m in hashtagMatch.OrderByDescending(x => x.Length))
{
var tag = m.Groups[2].ToString();
- //var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
if (!HashtagRegexes.HashtagName.IsMatch(tag))
{
@@ -107,9 +103,6 @@ namespace BirdsiteLive.Domain.Tools
type = "Hashtag"
});
- //messageContent = Regex.Replace(messageContent, m.ToString(),
- // $@" #{tag}");
-
messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()),
$@"{m.Groups[1]}#{tag}{m.Groups[3]}");
}
@@ -121,7 +114,6 @@ namespace BirdsiteLive.Domain.Tools
foreach (Match m in mentionMatch.OrderByDescending(x => x.Length))
{
var mention = m.Groups[2].ToString();
- //var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim();
if (!UserRegexes.TwitterAccount.IsMatch(mention))
{
@@ -138,21 +130,12 @@ namespace BirdsiteLive.Domain.Tools
href = url,
type = "Mention"
});
-
- //messageContent = Regex.Replace(messageContent, m.ToString(),
- // $@" @{mention}");
-
+
messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()),
$@"{m.Groups[1]}@{mention}{m.Groups[3]}");
}
}
- //// Clean up return lines
- //messageContent = Regex.Replace(messageContent, @"
", "
");
- //messageContent = Regex.Replace(messageContent, @"
", "
");
- //messageContent = Regex.Replace(messageContent, @" ", " ");
- //messageContent = Regex.Replace(messageContent, @" ", " ");
-
return (messageContent.Trim(), tags.ToArray());
}
From 0c6ee3dd4decc80103508b976eaeea0961250e06 Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 00:26:26 -0500
Subject: [PATCH 08/14] clean up
---
src/BirdsiteLive.Domain/Tools/StatusExtractor.cs | 14 +-------------
1 file changed, 1 insertion(+), 13 deletions(-)
diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
index b6b571c..a43ccb0 100644
--- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
+++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs
@@ -16,18 +16,6 @@ namespace BirdsiteLive.Domain.Tools
public class StatusExtractor : IStatusExtractor
{
- //private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)");
- //private readonly Regex _hastagRegex = new Regex(@"#\w+");
- //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)");
- //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)");
-
- //private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)");
- //private readonly Regex _mentionRegex = new Regex(@"@\w+");
- //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)");
- //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)");
-
- private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)");
-
private readonly InstanceSettings _instanceSettings;
private readonly ILogger _logger;
@@ -53,7 +41,7 @@ namespace BirdsiteLive.Domain.Tools
// messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} ");
// Extract Urls
- var urlMatch = _urlRegex.Matches(messageContent);
+ var urlMatch = UrlRegexes.Url.Matches(messageContent);
foreach (Match m in urlMatch)
{
var url = m.ToString().Replace("\n", string.Empty).Trim();
From 25d1f360badcf9e048bc0f5a6d06ba0777e52254 Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 00:27:48 -0500
Subject: [PATCH 09/14] added test
---
.../Tools/StatusExtractorTests.cs | 27 +++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
index 4ed8ee8..92ff130 100644
--- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
+++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
@@ -289,6 +289,33 @@ namespace BirdsiteLive.Domain.Tests.Tools
#endregion
}
+ [TestMethod]
+ public void Extract_SingleMentionTag_Dot_Test()
+ {
+ #region Stubs
+ var message = $".@mynickname Bla!{Environment.NewLine}Blo";
+ #endregion
+
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
+ var result = service.Extract(message);
+
+ #region Validations
+ logger.VerifyAll();
+ Assert.AreEqual(1, result.tags.Length);
+ Assert.AreEqual("@mynickname@domain.name", result.tags.First().name);
+ Assert.AreEqual("Mention", result.tags.First().type);
+ Assert.AreEqual("https://domain.name/users/mynickname", result.tags.First().href);
+
+ Assert.IsTrue(result.content.Contains("Bla!"));
+ Assert.IsTrue(result.content.Contains("Blo"));
+ Assert.IsTrue(result.content.Contains(@"@mynickname"));
+ #endregion
+ }
+
[TestMethod]
public void Extract_SingleMentionTag_SpecialChar_Test()
{
From 717f69054269983fc286d3a7be983769e717c497 Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 00:38:48 -0500
Subject: [PATCH 10/14] fix tests
---
.../RetrieveTweetsProcessorTests.cs | 26 ++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
diff --git a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs
index d66c2f7..b35a688 100644
--- a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs
+++ b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs
@@ -7,6 +7,7 @@ using BirdsiteLive.DAL.Models;
using BirdsiteLive.Pipeline.Processors;
using BirdsiteLive.Twitter;
using BirdsiteLive.Twitter.Models;
+using Microsoft.Extensions.Logging;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Moq;
@@ -59,14 +60,20 @@ namespace BirdsiteLive.Pipeline.Tests.Processors
It.IsAny()
))
.Returns(Task.CompletedTask);
+
+ var twitterUserServiceMock = new Mock(MockBehavior.Strict);
+
+ var logger = new Mock>(MockBehavior.Strict);
#endregion
- var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object);
+ var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object, twitterUserServiceMock.Object, logger.Object);
var usersResult = await processor.ProcessAsync(users, CancellationToken.None);
#region Validations
twitterServiceMock.VerifyAll();
twitterUserDalMock.VerifyAll();
+ twitterUserServiceMock.VerifyAll();
+ logger.VerifyAll();
Assert.AreEqual(0, usersResult.Length);
#endregion
@@ -117,14 +124,21 @@ namespace BirdsiteLive.Pipeline.Tests.Processors
.Returns(tweets);
var twitterUserDalMock = new Mock(MockBehavior.Strict);
+
+ var twitterUserServiceMock = new Mock(MockBehavior.Strict);
+
+ var logger = new Mock>(MockBehavior.Strict);
#endregion
- var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object);
+ var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object, twitterUserServiceMock.Object, logger.Object);
var usersResult = await processor.ProcessAsync(users, CancellationToken.None);
#region Validations
twitterServiceMock.VerifyAll();
twitterUserDalMock.VerifyAll();
+ twitterUserServiceMock.VerifyAll();
+ logger.VerifyAll();
+
Assert.AreEqual(users.Length, usersResult.Length);
Assert.AreEqual(users[0].Acct, usersResult[0].User.Acct);
@@ -177,14 +191,20 @@ namespace BirdsiteLive.Pipeline.Tests.Processors
.Returns(tweets);
var twitterUserDalMock = new Mock(MockBehavior.Strict);
+
+ var twitterUserServiceMock = new Mock(MockBehavior.Strict);
+
+ var logger = new Mock>(MockBehavior.Strict);
#endregion
- var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object);
+ var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object, twitterUserServiceMock.Object, logger.Object);
var usersResult = await processor.ProcessAsync(users, CancellationToken.None);
#region Validations
twitterServiceMock.VerifyAll();
twitterUserDalMock.VerifyAll();
+ twitterUserServiceMock.VerifyAll();
+ logger.VerifyAll();
Assert.AreEqual(users.Length, usersResult.Length);
Assert.AreEqual(users[0].Acct, usersResult[0].User.Acct);
From 32b53e09e2dd63477e6e3c40bfea92f21eb4eea8 Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 19:25:12 -0500
Subject: [PATCH 11/14] fine-tuning regex
---
.../Regexes/HashtagRegexes.cs | 2 +-
.../Regexes/UserRegexes.cs | 2 +-
.../Tools/StatusExtractorTests.cs | 26 +++++++++++++++++++
3 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
index 99b2f32..b6d9d00 100644
--- a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
+++ b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs
@@ -5,6 +5,6 @@ namespace BirdsiteLive.Common.Regexes
public class HashtagRegexes
{
public static readonly Regex HashtagName = new Regex(@"^[a-zA-Z0-9_]+$");
- public static readonly Regex Hashtag = new Regex(@"(.?)#([a-zA-Z0-9_]+)(\s|$|[<.,;:!?/|-])");
+ public static readonly Regex Hashtag = new Regex(@"(.?)#([a-zA-Z0-9_]+)(\s|$|[\[\]<>.,;:!?/|-])");
}
}
\ No newline at end of file
diff --git a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
index a9f1053..c20a544 100644
--- a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
+++ b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs
@@ -5,6 +5,6 @@ namespace BirdsiteLive.Common.Regexes
public class UserRegexes
{
public static readonly Regex TwitterAccount = new Regex(@"^[a-zA-Z0-9_]+$");
- public static readonly Regex Mention = new Regex(@"(.?)@([a-zA-Z0-9_]+)(\s|$|[<,;:!?/|-]|(. ))");
+ public static readonly Regex Mention = new Regex(@"(.?)@([a-zA-Z0-9_]+)(\s|$|[\[\]<>,;:!?/|-]|(. ))");
}
}
\ No newline at end of file
diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
index 92ff130..924461f 100644
--- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
+++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs
@@ -289,6 +289,32 @@ namespace BirdsiteLive.Domain.Tests.Tools
#endregion
}
+ [TestMethod]
+ public void Extract_SingleMentionTag_RT_Test()
+ {
+ #region Stubs
+ var message = $"[RT @mynickname]{Environment.NewLine}Bla!";
+ #endregion
+
+ #region Mocks
+ var logger = new Mock>();
+ #endregion
+
+ var service = new StatusExtractor(_settings, logger.Object);
+ var result = service.Extract(message);
+
+ #region Validations
+ logger.VerifyAll();
+ Assert.AreEqual(1, result.tags.Length);
+ Assert.AreEqual("@mynickname@domain.name", result.tags.First().name);
+ Assert.AreEqual("Mention", result.tags.First().type);
+ Assert.AreEqual("https://domain.name/users/mynickname", result.tags.First().href);
+
+ Assert.IsTrue(result.content.Contains("Bla!"));
+ Assert.IsTrue(result.content.Contains(@"@mynickname"));
+ #endregion
+ }
+
[TestMethod]
public void Extract_SingleMentionTag_Dot_Test()
{
From 7ddda8d18c4ec473c236410ff57699e8312182ce Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 20:05:59 -0500
Subject: [PATCH 12/14] better RT extraction
---
.../Extractors/TweetExtractor.cs | 35 ++++++++++++++-----
1 file changed, 27 insertions(+), 8 deletions(-)
diff --git a/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs b/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs
index c432a35..7fce30c 100644
--- a/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs
+++ b/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs
@@ -23,7 +23,7 @@ namespace BirdsiteLive.Twitter.Extractors
InReplyToStatusId = tweet.InReplyToStatusId,
InReplyToAccount = tweet.InReplyToScreenName,
MessageContent = ExtractMessage(tweet),
- Media = ExtractMedia(tweet.Media),
+ Media = ExtractMedia(tweet),
CreatedAt = tweet.CreatedAt.ToUniversalTime(),
IsReply = tweet.InReplyToUserId != null,
IsThread = tweet.InReplyToUserId != null && tweet.InReplyToUserId == tweet.CreatedBy.Id,
@@ -36,10 +36,17 @@ namespace BirdsiteLive.Twitter.Extractors
private string ExtractRetweetUrl(ITweet tweet)
{
- if (tweet.IsRetweet && tweet.FullText.Contains("https://t.co/"))
+ if (tweet.IsRetweet)
{
- var retweetId = tweet.FullText.Split(new[] { "https://t.co/" }, StringSplitOptions.RemoveEmptyEntries).Last();
- return $"https://t.co/{retweetId}";
+ if (tweet.RetweetedTweet != null)
+ {
+ return tweet.RetweetedTweet.Url;
+ }
+ if (tweet.FullText.Contains("https://t.co/"))
+ {
+ var retweetId = tweet.FullText.Split(new[] { "https://t.co/" }, StringSplitOptions.RemoveEmptyEntries).Last();
+ return $"https://t.co/{retweetId}";
+ }
}
return null;
@@ -47,8 +54,15 @@ namespace BirdsiteLive.Twitter.Extractors
public string ExtractMessage(ITweet tweet)
{
- var tweetUrls = tweet.Media.Select(x => x.URL).Distinct();
var message = tweet.FullText;
+ var tweetUrls = tweet.Media.Select(x => x.URL).Distinct();
+
+ if (tweet.IsRetweet && tweet.QuotedStatusId == null && message.StartsWith("RT") && tweet.RetweetedTweet != null)
+ {
+ message = tweet.RetweetedTweet.FullText;
+ tweetUrls = tweet.RetweetedTweet.Media.Select(x => x.URL).Distinct();
+ }
+
foreach (var tweetUrl in tweetUrls)
{
if(tweet.IsRetweet)
@@ -60,8 +74,10 @@ namespace BirdsiteLive.Twitter.Extractors
if (tweet.QuotedTweet != null) message = $"[Quote {{RT}}]{Environment.NewLine}{message}";
if (tweet.IsRetweet)
{
- if (tweet.RetweetedTweet != null)
+ if (tweet.RetweetedTweet != null && !message.StartsWith("RT"))
message = $"[{{RT}} @{tweet.RetweetedTweet.CreatedBy.ScreenName}]{Environment.NewLine}{message}";
+ else if (tweet.RetweetedTweet != null && message.StartsWith($"RT @{tweet.RetweetedTweet.CreatedBy.ScreenName}:"))
+ message = message.Replace($"RT @{tweet.RetweetedTweet.CreatedBy.ScreenName}:", $"[{{RT}} @{tweet.RetweetedTweet.CreatedBy.ScreenName}]{Environment.NewLine}");
else
message = message.Replace("RT", "[{{RT}}]");
}
@@ -73,10 +89,13 @@ namespace BirdsiteLive.Twitter.Extractors
return message;
}
- public ExtractedMedia[] ExtractMedia(List media)
+ public ExtractedMedia[] ExtractMedia(ITweet tweet)
{
- var result = new List();
+ var media = tweet.Media;
+ if (tweet.IsRetweet && tweet.RetweetedTweet != null)
+ media = tweet.RetweetedTweet.Media;
+ var result = new List();
foreach (var m in media)
{
var mediaUrl = GetMediaUrl(m);
From 299ad64269dfabd23a50abea1c39cd80b5ad5dac Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 20:50:28 -0500
Subject: [PATCH 13/14] road to 0.13.0
---
src/BirdsiteLive/BirdsiteLive.csproj | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/BirdsiteLive/BirdsiteLive.csproj b/src/BirdsiteLive/BirdsiteLive.csproj
index cef4948..607a195 100644
--- a/src/BirdsiteLive/BirdsiteLive.csproj
+++ b/src/BirdsiteLive/BirdsiteLive.csproj
@@ -4,7 +4,7 @@
netcoreapp3.1
d21486de-a812-47eb-a419-05682bb68856
Linux
- 0.12.2
+ 0.13.0
From 2e8313301b4f790530d33b352af9e5b5f973ac2c Mon Sep 17 00:00:00 2001
From: Nicolas Constant
Date: Tue, 2 Feb 2021 22:49:37 -0500
Subject: [PATCH 14/14] better DI
---
.../Processors/RetrieveTweetsProcessor.cs | 13 ++++---------
src/BirdsiteLive.Twitter/CachedTwitterService.cs | 7 ++++++-
.../BirdsiteLive.Pipeline.Tests.csproj | 10 ++++++----
.../Processors/RetrieveTweetsProcessorTests.cs | 6 +++---
4 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs
index c0976c5..e60c121 100644
--- a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs
+++ b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs
@@ -17,12 +17,12 @@ namespace BirdsiteLive.Pipeline.Processors
public class RetrieveTweetsProcessor : IRetrieveTweetsProcessor
{
private readonly ITwitterTweetsService _twitterTweetsService;
- private readonly ITwitterUserService _twitterUserService;
+ private readonly ICachedTwitterUserService _twitterUserService;
private readonly ITwitterUserDal _twitterUserDal;
private readonly ILogger _logger;
#region Ctor
- public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal, ITwitterUserService twitterUserService, ILogger logger)
+ public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal, ICachedTwitterUserService twitterUserService, ILogger logger)
{
_twitterTweetsService = twitterTweetsService;
_twitterUserDal = twitterUserDal;
@@ -77,13 +77,8 @@ namespace BirdsiteLive.Pipeline.Processors
}
catch (Exception e)
{
- _logger.LogError(e, "Error retrieving TL of {Username} from {LastTweetPostedId}", user.Acct, user.LastTweetPostedId);
-
- if (_twitterUserService is CachedTwitterUserService service)
- {
- _logger.LogInformation("Purge {Username} from cache", user.Acct);
- service.PurgeUser(user.Acct);
- }
+ _logger.LogError(e, "Error retrieving TL of {Username} from {LastTweetPostedId}, purging user from cache", user.Acct, user.LastTweetPostedId);
+ _twitterUserService.PurgeUser(user.Acct);
}
return tweets;
diff --git a/src/BirdsiteLive.Twitter/CachedTwitterService.cs b/src/BirdsiteLive.Twitter/CachedTwitterService.cs
index d8ca1fb..d9b90d2 100644
--- a/src/BirdsiteLive.Twitter/CachedTwitterService.cs
+++ b/src/BirdsiteLive.Twitter/CachedTwitterService.cs
@@ -4,7 +4,12 @@ using Microsoft.Extensions.Caching.Memory;
namespace BirdsiteLive.Twitter
{
- public class CachedTwitterUserService : ITwitterUserService
+ public interface ICachedTwitterUserService : ITwitterUserService
+ {
+ void PurgeUser(string username);
+ }
+
+ public class CachedTwitterUserService : ICachedTwitterUserService
{
private readonly ITwitterUserService _twitterService;
diff --git a/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj b/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj
index aa7750b..d1cfd06 100644
--- a/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj
+++ b/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj
@@ -14,12 +14,14 @@
-
-
-
-
+
+
+
+
+
+
diff --git a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs
index b35a688..02fd7bd 100644
--- a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs
+++ b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs
@@ -61,7 +61,7 @@ namespace BirdsiteLive.Pipeline.Tests.Processors
))
.Returns(Task.CompletedTask);
- var twitterUserServiceMock = new Mock(MockBehavior.Strict);
+ var twitterUserServiceMock = new Mock(MockBehavior.Strict);
var logger = new Mock>(MockBehavior.Strict);
#endregion
@@ -125,7 +125,7 @@ namespace BirdsiteLive.Pipeline.Tests.Processors
var twitterUserDalMock = new Mock(MockBehavior.Strict);
- var twitterUserServiceMock = new Mock(MockBehavior.Strict);
+ var twitterUserServiceMock = new Mock(MockBehavior.Strict);
var logger = new Mock>(MockBehavior.Strict);
#endregion
@@ -192,7 +192,7 @@ namespace BirdsiteLive.Pipeline.Tests.Processors
var twitterUserDalMock = new Mock(MockBehavior.Strict);
- var twitterUserServiceMock = new Mock(MockBehavior.Strict);
+ var twitterUserServiceMock = new Mock(MockBehavior.Strict);
var logger = new Mock>(MockBehavior.Strict);
#endregion