From ec420346b6335d982b33810f71d30b444b582ce2 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Mon, 1 Feb 2021 20:07:53 -0500 Subject: [PATCH 01/14] purge cache when TL retrieval fails, fix #79 --- .../Processors/RetrieveTweetsProcessor.cs | 31 +++++++++--- .../CachedTwitterService.cs | 5 ++ .../TwitterTweetsService.cs | 48 ++++++++----------- 3 files changed, 51 insertions(+), 33 deletions(-) diff --git a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs index c381dcf..c0976c5 100644 --- a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs +++ b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs @@ -9,6 +9,7 @@ using BirdsiteLive.Pipeline.Contracts; using BirdsiteLive.Pipeline.Models; using BirdsiteLive.Twitter; using BirdsiteLive.Twitter.Models; +using Microsoft.Extensions.Logging; using Tweetinvi.Models; namespace BirdsiteLive.Pipeline.Processors @@ -16,13 +17,17 @@ namespace BirdsiteLive.Pipeline.Processors public class RetrieveTweetsProcessor : IRetrieveTweetsProcessor { private readonly ITwitterTweetsService _twitterTweetsService; + private readonly ITwitterUserService _twitterUserService; private readonly ITwitterUserDal _twitterUserDal; + private readonly ILogger _logger; #region Ctor - public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal) + public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal, ITwitterUserService twitterUserService, ILogger logger) { _twitterTweetsService = twitterTweetsService; _twitterUserDal = twitterUserDal; + _twitterUserService = twitterUserService; + _logger = logger; } #endregion @@ -61,11 +66,25 @@ namespace BirdsiteLive.Pipeline.Processors private ExtractedTweet[] RetrieveNewTweets(SyncTwitterUser user) { - ExtractedTweet[] tweets; - if (user.LastTweetPostedId == -1) - tweets = _twitterTweetsService.GetTimeline(user.Acct, 1); - else - tweets = _twitterTweetsService.GetTimeline(user.Acct, 200, user.LastTweetSynchronizedForAllFollowersId); + var tweets = new ExtractedTweet[0]; + + try + { + if (user.LastTweetPostedId == -1) + tweets = _twitterTweetsService.GetTimeline(user.Acct, 1); + else + tweets = _twitterTweetsService.GetTimeline(user.Acct, 200, user.LastTweetSynchronizedForAllFollowersId); + } + catch (Exception e) + { + _logger.LogError(e, "Error retrieving TL of {Username} from {LastTweetPostedId}", user.Acct, user.LastTweetPostedId); + + if (_twitterUserService is CachedTwitterUserService service) + { + _logger.LogInformation("Purge {Username} from cache", user.Acct); + service.PurgeUser(user.Acct); + } + } return tweets; } diff --git a/src/BirdsiteLive.Twitter/CachedTwitterService.cs b/src/BirdsiteLive.Twitter/CachedTwitterService.cs index 2f0be46..d8ca1fb 100644 --- a/src/BirdsiteLive.Twitter/CachedTwitterService.cs +++ b/src/BirdsiteLive.Twitter/CachedTwitterService.cs @@ -38,5 +38,10 @@ namespace BirdsiteLive.Twitter return user; } + + public void PurgeUser(string username) + { + _userCache.Remove(username); + } } } \ No newline at end of file diff --git a/src/BirdsiteLive.Twitter/TwitterTweetsService.cs b/src/BirdsiteLive.Twitter/TwitterTweetsService.cs index 49684e8..41b2d60 100644 --- a/src/BirdsiteLive.Twitter/TwitterTweetsService.cs +++ b/src/BirdsiteLive.Twitter/TwitterTweetsService.cs @@ -61,36 +61,30 @@ namespace BirdsiteLive.Twitter public ExtractedTweet[] GetTimeline(string username, int nberTweets, long fromTweetId = -1) { var tweets = new List(); - try + + _twitterAuthenticationInitializer.EnsureAuthenticationIsInitialized(); + ExceptionHandler.SwallowWebExceptions = false; + TweetinviConfig.CurrentThreadSettings.TweetMode = TweetMode.Extended; + + var user = _twitterUserService.GetUser(username); + if (user == null || user.Protected) return new ExtractedTweet[0]; + + if (fromTweetId == -1) { - _twitterAuthenticationInitializer.EnsureAuthenticationIsInitialized(); - ExceptionHandler.SwallowWebExceptions = false; - TweetinviConfig.CurrentThreadSettings.TweetMode = TweetMode.Extended; - - var user = _twitterUserService.GetUser(username); - if (user == null || user.Protected) return new ExtractedTweet[0]; - - if (fromTweetId == -1) - { - var timeline = Timeline.GetUserTimeline(user.Id, nberTweets); - _statisticsHandler.CalledTimelineApi(); - if (timeline != null) tweets.AddRange(timeline); - } - else - { - var timelineRequestParameters = new UserTimelineParameters - { - SinceId = fromTweetId, - MaximumNumberOfTweetsToRetrieve = nberTweets - }; - var timeline = Timeline.GetUserTimeline(user.Id, timelineRequestParameters); - _statisticsHandler.CalledTimelineApi(); - if (timeline != null) tweets.AddRange(timeline); - } + var timeline = Timeline.GetUserTimeline(user.Id, nberTweets); + _statisticsHandler.CalledTimelineApi(); + if (timeline != null) tweets.AddRange(timeline); } - catch (Exception e) + else { - _logger.LogError(e, "Error retrieving timeline from {Username}, from {TweetId}", username, fromTweetId); + var timelineRequestParameters = new UserTimelineParameters + { + SinceId = fromTweetId, + MaximumNumberOfTweetsToRetrieve = nberTweets + }; + var timeline = Timeline.GetUserTimeline(user.Id, timelineRequestParameters); + _statisticsHandler.CalledTimelineApi(); + if (timeline != null) tweets.AddRange(timeline); } return tweets.Select(_tweetExtractor.Extract).ToArray(); From 0bd8e38f28a30dc88b96f125ba1ce17247bfa66c Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Mon, 1 Feb 2021 20:13:10 -0500 Subject: [PATCH 02/14] refactoring user regex --- src/BirdsiteLive.Common/Regexes/UserRegex.cs | 9 +++++++++ src/BirdsiteLive/Controllers/UsersController.cs | 4 ++-- src/BirdsiteLive/Controllers/WellKnownController.cs | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 src/BirdsiteLive.Common/Regexes/UserRegex.cs diff --git a/src/BirdsiteLive.Common/Regexes/UserRegex.cs b/src/BirdsiteLive.Common/Regexes/UserRegex.cs new file mode 100644 index 0000000..b3da74a --- /dev/null +++ b/src/BirdsiteLive.Common/Regexes/UserRegex.cs @@ -0,0 +1,9 @@ +using System.Text.RegularExpressions; + +namespace BirdsiteLive.Common.Regexes +{ + public class UserRegex + { + public static readonly Regex TwitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$"); + } +} \ No newline at end of file diff --git a/src/BirdsiteLive/Controllers/UsersController.cs b/src/BirdsiteLive/Controllers/UsersController.cs index a22ae73..486d9b6 100644 --- a/src/BirdsiteLive/Controllers/UsersController.cs +++ b/src/BirdsiteLive/Controllers/UsersController.cs @@ -9,6 +9,7 @@ using System.Threading; using System.Threading.Tasks; using BirdsiteLive.ActivityPub; using BirdsiteLive.ActivityPub.Models; +using BirdsiteLive.Common.Regexes; using BirdsiteLive.Common.Settings; using BirdsiteLive.Domain; using BirdsiteLive.Models; @@ -28,7 +29,6 @@ namespace BirdsiteLive.Controllers private readonly IUserService _userService; private readonly IStatusService _statusService; private readonly InstanceSettings _instanceSettings; - private readonly Regex _twitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$"); #region Ctor public UsersController(ITwitterUserService twitterUserService, IUserService userService, IStatusService statusService, InstanceSettings instanceSettings, ITwitterTweetsService twitterTweetService) @@ -62,7 +62,7 @@ namespace BirdsiteLive.Controllers // Ensure valid username // https://help.twitter.com/en/managing-your-account/twitter-username-rules TwitterUser user = null; - if (!string.IsNullOrWhiteSpace(id) && _twitterAccountRegex.IsMatch(id) && id.Length <= 15) + if (!string.IsNullOrWhiteSpace(id) && UserRegex.TwitterAccountRegex.IsMatch(id) && id.Length <= 15) user = _twitterUserService.GetUser(id); var acceptHeaders = Request.Headers["Accept"]; diff --git a/src/BirdsiteLive/Controllers/WellKnownController.cs b/src/BirdsiteLive/Controllers/WellKnownController.cs index 3f060a7..f974665 100644 --- a/src/BirdsiteLive/Controllers/WellKnownController.cs +++ b/src/BirdsiteLive/Controllers/WellKnownController.cs @@ -4,6 +4,7 @@ using System.Linq; using System.Text.RegularExpressions; using System.Threading.Tasks; using BirdsiteLive.ActivityPub.Converters; +using BirdsiteLive.Common.Regexes; using BirdsiteLive.Common.Settings; using BirdsiteLive.DAL.Contracts; using BirdsiteLive.Models; @@ -20,7 +21,6 @@ namespace BirdsiteLive.Controllers private readonly ITwitterUserService _twitterUserService; private readonly ITwitterUserDal _twitterUserDal; private readonly InstanceSettings _settings; - private readonly Regex _twitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$"); #region Ctor public WellKnownController(InstanceSettings settings, ITwitterUserService twitterUserService, ITwitterUserDal twitterUserDal) @@ -164,7 +164,7 @@ namespace BirdsiteLive.Controllers // Ensure valid username // https://help.twitter.com/en/managing-your-account/twitter-username-rules - if (string.IsNullOrWhiteSpace(name) || !_twitterAccountRegex.IsMatch(name) || name.Length > 15 ) + if (string.IsNullOrWhiteSpace(name) || !UserRegex.TwitterAccountRegex.IsMatch(name) || name.Length > 15 ) return NotFound(); if (!string.IsNullOrWhiteSpace(domain) && domain != _settings.Domain) From b2be896e95c6a12b0a090881e08fb25e944a92ec Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Mon, 1 Feb 2021 20:19:14 -0500 Subject: [PATCH 03/14] added regexes --- src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs | 9 +++++++++ src/BirdsiteLive.Common/Regexes/UrlRegexes.cs | 9 +++++++++ src/BirdsiteLive.Common/Regexes/UserRegex.cs | 9 --------- src/BirdsiteLive.Common/Regexes/UserRegexes.cs | 10 ++++++++++ src/BirdsiteLive/Controllers/UsersController.cs | 2 +- src/BirdsiteLive/Controllers/WellKnownController.cs | 2 +- 6 files changed, 30 insertions(+), 11 deletions(-) create mode 100644 src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs create mode 100644 src/BirdsiteLive.Common/Regexes/UrlRegexes.cs delete mode 100644 src/BirdsiteLive.Common/Regexes/UserRegex.cs create mode 100644 src/BirdsiteLive.Common/Regexes/UserRegexes.cs diff --git a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs new file mode 100644 index 0000000..c5e8ed7 --- /dev/null +++ b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs @@ -0,0 +1,9 @@ +using System.Text.RegularExpressions; + +namespace BirdsiteLive.Common.Regexes +{ + public class HashtagRegexes + { + public static readonly Regex Hashtag = new Regex(@"(.)(#[a-zA-Z0-9]+)(\s|$|[.,;:!?/|-])"); + } +} \ No newline at end of file diff --git a/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs b/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs new file mode 100644 index 0000000..ea3e5c2 --- /dev/null +++ b/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs @@ -0,0 +1,9 @@ +using System.Text.RegularExpressions; + +namespace BirdsiteLive.Common.Regexes +{ + public class UrlRegexes + { + public static readonly Regex Url = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); + } +} \ No newline at end of file diff --git a/src/BirdsiteLive.Common/Regexes/UserRegex.cs b/src/BirdsiteLive.Common/Regexes/UserRegex.cs deleted file mode 100644 index b3da74a..0000000 --- a/src/BirdsiteLive.Common/Regexes/UserRegex.cs +++ /dev/null @@ -1,9 +0,0 @@ -using System.Text.RegularExpressions; - -namespace BirdsiteLive.Common.Regexes -{ - public class UserRegex - { - public static readonly Regex TwitterAccountRegex = new Regex(@"^[a-zA-Z0-9_]+$"); - } -} \ No newline at end of file diff --git a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs new file mode 100644 index 0000000..74c8b2e --- /dev/null +++ b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs @@ -0,0 +1,10 @@ +using System.Text.RegularExpressions; + +namespace BirdsiteLive.Common.Regexes +{ + public class UserRegexes + { + public static readonly Regex TwitterAccount = new Regex(@"^[a-zA-Z0-9_]+$"); + public static readonly Regex Mention = new Regex(@"(.)(@[a-zA-Z0-9_]+)(\s|$|[,;:!?/|-]|(. ))"); + } +} \ No newline at end of file diff --git a/src/BirdsiteLive/Controllers/UsersController.cs b/src/BirdsiteLive/Controllers/UsersController.cs index 486d9b6..0fbefcd 100644 --- a/src/BirdsiteLive/Controllers/UsersController.cs +++ b/src/BirdsiteLive/Controllers/UsersController.cs @@ -62,7 +62,7 @@ namespace BirdsiteLive.Controllers // Ensure valid username // https://help.twitter.com/en/managing-your-account/twitter-username-rules TwitterUser user = null; - if (!string.IsNullOrWhiteSpace(id) && UserRegex.TwitterAccountRegex.IsMatch(id) && id.Length <= 15) + if (!string.IsNullOrWhiteSpace(id) && UserRegexes.TwitterAccount.IsMatch(id) && id.Length <= 15) user = _twitterUserService.GetUser(id); var acceptHeaders = Request.Headers["Accept"]; diff --git a/src/BirdsiteLive/Controllers/WellKnownController.cs b/src/BirdsiteLive/Controllers/WellKnownController.cs index f974665..4151ab0 100644 --- a/src/BirdsiteLive/Controllers/WellKnownController.cs +++ b/src/BirdsiteLive/Controllers/WellKnownController.cs @@ -164,7 +164,7 @@ namespace BirdsiteLive.Controllers // Ensure valid username // https://help.twitter.com/en/managing-your-account/twitter-username-rules - if (string.IsNullOrWhiteSpace(name) || !UserRegex.TwitterAccountRegex.IsMatch(name) || name.Length > 15 ) + if (string.IsNullOrWhiteSpace(name) || !UserRegexes.TwitterAccount.IsMatch(name) || name.Length > 15 ) return NotFound(); if (!string.IsNullOrWhiteSpace(domain) && domain != _settings.Domain) From 6fac0ceffaa5ffeb3321b1cc7a73b0203bd9a4e8 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Mon, 1 Feb 2021 20:23:54 -0500 Subject: [PATCH 04/14] refactoring regexes --- src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs | 11 +++++++++++ src/BirdsiteLive.Domain/Tools/StatusExtractor.cs | 8 ++------ 2 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs diff --git a/src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs b/src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs new file mode 100644 index 0000000..a6b8ae5 --- /dev/null +++ b/src/BirdsiteLive.Common/Regexes/EmojiRegexes.cs @@ -0,0 +1,11 @@ +using System.Text.RegularExpressions; + +namespace BirdsiteLive.Common.Regexes +{ + public class EmojiRegexes + { + public static readonly Regex Emoji = new Regex(EmojiPattern); + + private const string EmojiPattern = @"(?:\uD83D(?:\uDD73\uFE0F?|\uDC41(?:(?:\uFE0F(?:\u200D\uD83D\uDDE8\uFE0F?)?|\u200D\uD83D\uDDE8\uFE0F?))?|[\uDDE8\uDDEF]\uFE0F?|\uDC4B(?:\uD83C[\uDFFB-\uDFFF])?|\uDD90(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|[\uDD96\uDC4C\uDC48\uDC49\uDC46\uDD95\uDC47\uDC4D\uDC4E\uDC4A\uDC4F\uDE4C\uDC50\uDE4F\uDC85\uDCAA\uDC42\uDC43\uDC76\uDC66\uDC67](?:\uD83C[\uDFFB-\uDFFF])?|\uDC71(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2640\u2642]\uFE0F?))?)|\u200D(?:[\u2640\u2642]\uFE0F?)))?|\uDC68(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFC-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFE]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC68\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)|\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)))))?|\uDC69(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFC-\uDFFF]|\uDC68\uD83C[\uDFFC-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFD-\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFD\uDFFF]|\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFE]|\uDC68\uD83C[\uDFFB-\uDFFE])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])|\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])))))?|[\uDC74\uDC75](?:\uD83C[\uDFFB-\uDFFF])?|[\uDE4D\uDE4E\uDE45\uDE46\uDC81\uDE4B\uDE47\uDC6E](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD75(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC82\uDC77](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDC78(?:\uD83C[\uDFFB-\uDFFF])?|\uDC73(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC72\uDC70\uDC7C](?:\uD83C[\uDFFB-\uDFFF])?|[\uDC86\uDC87\uDEB6](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC83\uDD7A](?:\uD83C[\uDFFB-\uDFFF])?|\uDD74(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uDC6F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDEA3\uDEB4\uDEB5](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDEC0\uDECC\uDC6D\uDC6B\uDC6C](?:\uD83C[\uDFFB-\uDFFF])?|\uDDE3\uFE0F?|\uDC15(?:\u200D\uD83E\uDDBA)?|[\uDC3F\uDD4A\uDD77\uDD78\uDDFA\uDEE3\uDEE4\uDEE2\uDEF3\uDEE5\uDEE9\uDEF0\uDECE\uDD70\uDD79\uDDBC\uDD76\uDECD\uDDA5\uDDA8\uDDB1\uDDB2\uDCFD\uDD6F\uDDDE\uDDF3\uDD8B\uDD8A\uDD8C\uDD8D\uDDC2\uDDD2\uDDD3\uDD87\uDDC3\uDDC4\uDDD1\uDDDD\uDEE0\uDDE1\uDEE1\uDDDC\uDECF\uDECB\uDD49]\uFE0F?|[\uDE00\uDE03\uDE04\uDE01\uDE06\uDE05\uDE02\uDE42\uDE43\uDE09\uDE0A\uDE07\uDE0D\uDE18\uDE17\uDE1A\uDE19\uDE0B\uDE1B-\uDE1D\uDE10\uDE11\uDE36\uDE0F\uDE12\uDE44\uDE2C\uDE0C\uDE14\uDE2A\uDE34\uDE37\uDE35\uDE0E\uDE15\uDE1F\uDE41\uDE2E\uDE2F\uDE32\uDE33\uDE26-\uDE28\uDE30\uDE25\uDE22\uDE2D\uDE31\uDE16\uDE23\uDE1E\uDE13\uDE29\uDE2B\uDE24\uDE21\uDE20\uDE08\uDC7F\uDC80\uDCA9\uDC79-\uDC7B\uDC7D\uDC7E\uDE3A\uDE38\uDE39\uDE3B-\uDE3D\uDE40\uDE3F\uDE3E\uDE48-\uDE4A\uDC8B\uDC8C\uDC98\uDC9D\uDC96\uDC97\uDC93\uDC9E\uDC95\uDC9F\uDC94\uDC9B\uDC9A\uDC99\uDC9C\uDDA4\uDCAF\uDCA2\uDCA5\uDCAB\uDCA6\uDCA8\uDCA3\uDCAC\uDCAD\uDCA4\uDC40\uDC45\uDC44\uDC8F\uDC91\uDC6A\uDC64\uDC65\uDC63\uDC35\uDC12\uDC36\uDC29\uDC3A\uDC31\uDC08\uDC2F\uDC05\uDC06\uDC34\uDC0E\uDC2E\uDC02-\uDC04\uDC37\uDC16\uDC17\uDC3D\uDC0F\uDC11\uDC10\uDC2A\uDC2B\uDC18\uDC2D\uDC01\uDC00\uDC39\uDC30\uDC07\uDC3B\uDC28\uDC3C\uDC3E\uDC14\uDC13\uDC23-\uDC27\uDC38\uDC0A\uDC22\uDC0D\uDC32\uDC09\uDC33\uDC0B\uDC2C\uDC1F-\uDC21\uDC19\uDC1A\uDC0C\uDC1B-\uDC1E\uDC90\uDCAE\uDD2A\uDDFE\uDDFB\uDC92\uDDFC\uDDFD\uDD4C\uDED5\uDD4D\uDD4B\uDC88\uDE82-\uDE8A\uDE9D\uDE9E\uDE8B-\uDE8E\uDE90-\uDE9C\uDEF5\uDEFA\uDEB2\uDEF4\uDEF9\uDE8F\uDEA8\uDEA5\uDEA6\uDED1\uDEA7\uDEF6\uDEA4\uDEA2\uDEEB\uDEEC\uDCBA\uDE81\uDE9F-\uDEA1\uDE80\uDEF8\uDD5B\uDD67\uDD50\uDD5C\uDD51\uDD5D\uDD52\uDD5E\uDD53\uDD5F\uDD54\uDD60\uDD55\uDD61\uDD56\uDD62\uDD57\uDD63\uDD58\uDD64\uDD59\uDD65\uDD5A\uDD66\uDD25\uDCA7\uDEF7\uDD2E\uDC53-\uDC62\uDC51\uDC52\uDCFF\uDC84\uDC8D\uDC8E\uDD07-\uDD0A\uDCE2\uDCE3\uDCEF\uDD14\uDD15\uDCFB\uDCF1\uDCF2\uDCDE-\uDCE0\uDD0B\uDD0C\uDCBB\uDCBD-\uDCC0\uDCFA\uDCF7-\uDCF9\uDCFC\uDD0D\uDD0E\uDCA1\uDD26\uDCD4-\uDCDA\uDCD3\uDCD2\uDCC3\uDCDC\uDCC4\uDCF0\uDCD1\uDD16\uDCB0\uDCB4-\uDCB8\uDCB3\uDCB9\uDCB1\uDCB2\uDCE7-\uDCE9\uDCE4-\uDCE6\uDCEB\uDCEA\uDCEC-\uDCEE\uDCDD\uDCBC\uDCC1\uDCC2\uDCC5-\uDCD0\uDD12\uDD13\uDD0F-\uDD11\uDD28\uDD2B\uDD27\uDD29\uDD17\uDD2C\uDD2D\uDCE1\uDC89\uDC8A\uDEAA\uDEBD\uDEBF\uDEC1\uDED2\uDEAC\uDDFF\uDEAE\uDEB0\uDEB9-\uDEBC\uDEBE\uDEC2-\uDEC5\uDEB8\uDEAB\uDEB3\uDEAD\uDEAF\uDEB1\uDEB7\uDCF5\uDD1E\uDD03\uDD04\uDD19-\uDD1D\uDED0\uDD4E\uDD2F\uDD00-\uDD02\uDD3C\uDD3D\uDD05\uDD06\uDCF6\uDCF3\uDCF4\uDD31\uDCDB\uDD30\uDD1F-\uDD24\uDD34\uDFE0-\uDFE2\uDD35\uDFE3-\uDFE5\uDFE7-\uDFE9\uDFE6\uDFEA\uDFEB\uDD36-\uDD3B\uDCA0\uDD18\uDD33\uDD32\uDEA9])|\uD83E(?:[\uDD1A\uDD0F\uDD1E\uDD1F\uDD18\uDD19\uDD1B\uDD1C\uDD32\uDD33\uDDB5\uDDB6\uDDBB\uDDD2](?:\uD83C[\uDFFB-\uDFFF])?|\uDDD1(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1\uD83C[\uDFFB-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?)))?|[\uDDD4\uDDD3](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDCF\uDD26\uDD37](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD34\uDDD5\uDD35\uDD30\uDD31\uDD36](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDB8\uDDB9\uDDD9-\uDDDD](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDDDE\uDDDF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDDCD\uDDCE\uDDD6\uDDD7\uDD38](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD3C(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDD3D\uDD3E\uDD39\uDDD8](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD23\uDD70\uDD29\uDD2A\uDD11\uDD17\uDD2D\uDD2B\uDD14\uDD10\uDD28\uDD25\uDD24\uDD12\uDD15\uDD22\uDD2E\uDD27\uDD75\uDD76\uDD74\uDD2F\uDD20\uDD73\uDD13\uDDD0\uDD7A\uDD71\uDD2C\uDD21\uDD16\uDDE1\uDD0E\uDD0D\uDD1D\uDDBE\uDDBF\uDDE0\uDDB7\uDDB4\uDD3A\uDDB0\uDDB1\uDDB3\uDDB2\uDD8D\uDDA7\uDDAE\uDD8A\uDD9D\uDD81\uDD84\uDD93\uDD8C\uDD99\uDD92\uDD8F\uDD9B\uDD94\uDD87\uDDA5\uDDA6\uDDA8\uDD98\uDDA1\uDD83\uDD85\uDD86\uDDA2\uDD89\uDDA9\uDD9A\uDD9C\uDD8E\uDD95\uDD96\uDD88\uDD8B\uDD97\uDD82\uDD9F\uDDA0\uDD40\uDD6D\uDD5D\uDD65\uDD51\uDD54\uDD55\uDD52\uDD6C\uDD66\uDDC4\uDDC5\uDD5C\uDD50\uDD56\uDD68\uDD6F\uDD5E\uDDC7\uDDC0\uDD69\uDD53\uDD6A\uDD59\uDDC6\uDD5A\uDD58\uDD63\uDD57\uDDC8\uDDC2\uDD6B\uDD6E\uDD5F-\uDD61\uDD80\uDD9E\uDD90\uDD91\uDDAA\uDDC1\uDD67\uDD5B\uDD42\uDD43\uDD64\uDDC3\uDDC9\uDDCA\uDD62\uDD44\uDDED\uDDF1\uDDBD\uDDBC\uDE82\uDDF3\uDE90\uDDE8\uDDE7\uDD47-\uDD49\uDD4E\uDD4F\uDD4D\uDD4A\uDD4B\uDD45\uDD3F\uDD4C\uDE80\uDE81\uDDFF\uDDE9\uDDF8\uDDF5\uDDF6\uDD7D\uDD7C\uDDBA\uDDE3-\uDDE6\uDD7B\uDE71-\uDE73\uDD7E\uDD7F\uDE70\uDDE2\uDE95\uDD41\uDDEE\uDE94\uDDFE\uDE93\uDDAF\uDDF0\uDDF2\uDDEA-\uDDEC\uDE78-\uDE7A\uDE91\uDE92\uDDF4\uDDF7\uDDF9-\uDDFD\uDDEF])|[\u263A\u2639\u2620\u2763\u2764]\uFE0F?|\u270B(?:\uD83C[\uDFFB-\uDFFF])?|[\u270C\u261D](?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\u270A(?:\uD83C[\uDFFB-\uDFFF])?|\u270D(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uD83C(?:\uDF85(?:\uD83C[\uDFFB-\uDFFF])?|\uDFC3(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC7\uDFC2](?:\uD83C[\uDFFB-\uDFFF])?|\uDFCC(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC4\uDFCA](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDFCB(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFF5\uDF36\uDF7D\uDFD4-\uDFD6\uDFDC-\uDFDF\uDFDB\uDFD7\uDFD8\uDFDA\uDFD9\uDFCE\uDFCD\uDF21\uDF24-\uDF2C\uDF97\uDF9F\uDF96\uDF99-\uDF9B\uDF9E\uDFF7\uDD70\uDD71\uDD7E\uDD7F\uDE02\uDE37]\uFE0F?|\uDFF4(?:(?:\u200D\u2620\uFE0F?|\uDB40\uDC67\uDB40\uDC62\uDB40(?:\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F|\uDC73\uDB40\uDC63\uDB40\uDC74\uDB40\uDC7F|\uDC77\uDB40\uDC6C\uDB40\uDC73\uDB40\uDC7F)))?|\uDFF3(?:(?:\uFE0F(?:\u200D\uD83C\uDF08)?|\u200D\uD83C\uDF08))?|\uDDE6\uD83C[\uDDE8-\uDDEC\uDDEE\uDDF1\uDDF2\uDDF4\uDDF6-\uDDFA\uDDFC\uDDFD\uDDFF]|\uDDE7\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEF\uDDF1-\uDDF4\uDDF6-\uDDF9\uDDFB\uDDFC\uDDFE\uDDFF]|\uDDE8\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDEE\uDDF0-\uDDF5\uDDF7\uDDFA-\uDDFF]|\uDDE9\uD83C[\uDDEA\uDDEC\uDDEF\uDDF0\uDDF2\uDDF4\uDDFF]|\uDDEA\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDED\uDDF7-\uDDFA]|\uDDEB\uD83C[\uDDEE-\uDDF0\uDDF2\uDDF4\uDDF7]|\uDDEC\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEE\uDDF1-\uDDF3\uDDF5-\uDDFA\uDDFC\uDDFE]|\uDDED\uD83C[\uDDF0\uDDF2\uDDF3\uDDF7\uDDF9\uDDFA]|\uDDEE\uD83C[\uDDE8-\uDDEA\uDDF1-\uDDF4\uDDF6-\uDDF9]|\uDDEF\uD83C[\uDDEA\uDDF2\uDDF4\uDDF5]|\uDDF0\uD83C[\uDDEA\uDDEC-\uDDEE\uDDF2\uDDF3\uDDF5\uDDF7\uDDFC\uDDFE\uDDFF]|\uDDF1\uD83C[\uDDE6-\uDDE8\uDDEE\uDDF0\uDDF7-\uDDFB\uDDFE]|\uDDF2\uD83C[\uDDE6\uDDE8-\uDDED\uDDF0-\uDDFF]|\uDDF3\uD83C[\uDDE6\uDDE8\uDDEA-\uDDEC\uDDEE\uDDF1\uDDF4\uDDF5\uDDF7\uDDFA\uDDFF]|\uDDF4\uD83C\uDDF2|\uDDF5\uD83C[\uDDE6\uDDEA-\uDDED\uDDF0-\uDDF3\uDDF7-\uDDF9\uDDFC\uDDFE]|\uDDF6\uD83C\uDDE6|\uDDF7\uD83C[\uDDEA\uDDF4\uDDF8\uDDFA\uDDFC]|\uDDF8\uD83C[\uDDE6-\uDDEA\uDDEC-\uDDF4\uDDF7-\uDDF9\uDDFB\uDDFD-\uDDFF]|\uDDF9\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDED\uDDEF-\uDDF4\uDDF7\uDDF9\uDDFB\uDDFC\uDDFF]|\uDDFA\uD83C[\uDDE6\uDDEC\uDDF2\uDDF3\uDDF8\uDDFE\uDDFF]|\uDDFB\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDEE\uDDF3\uDDFA]|\uDDFC\uD83C[\uDDEB\uDDF8]|\uDDFD\uD83C\uDDF0|\uDDFE\uD83C[\uDDEA\uDDF9]|\uDDFF\uD83C[\uDDE6\uDDF2\uDDFC]|[\uDFFB-\uDFFF\uDF38-\uDF3C\uDF37\uDF31-\uDF35\uDF3E-\uDF43\uDF47-\uDF53\uDF45\uDF46\uDF3D\uDF44\uDF30\uDF5E\uDF56\uDF57\uDF54\uDF5F\uDF55\uDF2D-\uDF2F\uDF73\uDF72\uDF7F\uDF71\uDF58-\uDF5D\uDF60\uDF62-\uDF65\uDF61\uDF66-\uDF6A\uDF82\uDF70\uDF6B-\uDF6F\uDF7C\uDF75\uDF76\uDF7E\uDF77-\uDF7B\uDF74\uDFFA\uDF0D-\uDF10\uDF0B\uDFE0-\uDFE6\uDFE8-\uDFED\uDFEF\uDFF0\uDF01\uDF03-\uDF07\uDF09\uDFA0-\uDFA2\uDFAA\uDF11-\uDF20\uDF0C\uDF00\uDF08\uDF02\uDF0A\uDF83\uDF84\uDF86-\uDF8B\uDF8D-\uDF91\uDF80\uDF81\uDFAB\uDFC6\uDFC5\uDFC0\uDFD0\uDFC8\uDFC9\uDFBE\uDFB3\uDFCF\uDFD1-\uDFD3\uDFF8\uDFA3\uDFBD\uDFBF\uDFAF\uDFB1\uDFAE\uDFB0\uDFB2\uDCCF\uDC04\uDFB4\uDFAD\uDFA8\uDF92\uDFA9\uDF93\uDFBC\uDFB5\uDFB6\uDFA4\uDFA7\uDFB7-\uDFBB\uDFA5\uDFAC\uDFEE\uDFF9\uDFE7\uDFA6\uDD8E\uDD91-\uDD9A\uDE01\uDE36\uDE2F\uDE50\uDE39\uDE1A\uDE32\uDE51\uDE38\uDE34\uDE33\uDE3A\uDE35\uDFC1\uDF8C])|\u26F7\uFE0F?|\u26F9(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\u2618\u26F0\u26E9\u2668\u26F4\u2708\u23F1\u23F2\u2600\u2601\u26C8\u2602\u26F1\u2744\u2603\u2604\u26F8\u2660\u2665\u2666\u2663\u265F\u26D1\u260E\u2328\u2709\u270F\u2712\u2702\u26CF\u2692\u2694\u2699\u2696\u26D3\u2697\u26B0\u26B1\u26A0\u2622\u2623\u2B06\u2197\u27A1\u2198\u2B07\u2199\u2B05\u2196\u2195\u2194\u21A9\u21AA\u2934\u2935\u269B\u2721\u2638\u262F\u271D\u2626\u262A\u262E\u25B6\u23ED\u23EF\u25C0\u23EE\u23F8-\u23FA\u23CF\u2640\u2642\u2695\u267E\u267B\u269C\u2611\u2714\u2716\u303D\u2733\u2734\u2747\u203C\u2049\u3030\u00A9\u00AE\u2122]\uFE0F?|[\u0023\u002A\u0030-\u0039](?:\uFE0F\u20E3|\u20E3)|[\u2139\u24C2\u3297\u3299\u25FC\u25FB\u25AA\u25AB]\uFE0F?|[\u2615\u26EA\u26F2\u26FA\u26FD\u2693\u26F5\u231B\u23F3\u231A\u23F0\u2B50\u26C5\u2614\u26A1\u26C4\u2728\u26BD\u26BE\u26F3\u267F\u26D4\u2648-\u2653\u26CE\u23E9-\u23EC\u2B55\u2705\u274C\u274E\u2795-\u2797\u27B0\u27BF\u2753-\u2755\u2757\u26AB\u26AA\u2B1B\u2B1C\u25FE\u25FD])"; + } +} \ No newline at end of file diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index 778f6b5..a181ac2 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -2,6 +2,7 @@ using System.Linq; using System.Text.RegularExpressions; using BirdsiteLive.ActivityPub.Models; +using BirdsiteLive.Common.Regexes; using BirdsiteLive.Common.Settings; namespace BirdsiteLive.Domain.Tools @@ -25,8 +26,6 @@ namespace BirdsiteLive.Domain.Tools private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); - private readonly Regex _emojiRegex = new Regex(EmojiPattern); - private readonly InstanceSettings _instanceSettings; #region Ctor @@ -48,7 +47,7 @@ namespace BirdsiteLive.Domain.Tools messageContent = Regex.Replace(messageContent, @"\(#", "( #"); // Secure emojis - var emojiMatch = _emojiRegex.Matches(messageContent); + var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent); foreach (Match m in emojiMatch) messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} "); @@ -141,8 +140,5 @@ namespace BirdsiteLive.Domain.Tools return result; } - - - private const string EmojiPattern = @"(?:\uD83D(?:\uDD73\uFE0F?|\uDC41(?:(?:\uFE0F(?:\u200D\uD83D\uDDE8\uFE0F?)?|\u200D\uD83D\uDDE8\uFE0F?))?|[\uDDE8\uDDEF]\uFE0F?|\uDC4B(?:\uD83C[\uDFFB-\uDFFF])?|\uDD90(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|[\uDD96\uDC4C\uDC48\uDC49\uDC46\uDD95\uDC47\uDC4D\uDC4E\uDC4A\uDC4F\uDE4C\uDC50\uDE4F\uDC85\uDCAA\uDC42\uDC43\uDC76\uDC66\uDC67](?:\uD83C[\uDFFB-\uDFFF])?|\uDC71(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2640\u2642]\uFE0F?))?)|\u200D(?:[\u2640\u2642]\uFE0F?)))?|\uDC68(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFC-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D\uDC68\uD83C[\uDFFB-\uDFFE]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC68\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)|\u200D\uD83D(?:\uDC8B\u200D\uD83D\uDC68|\uDC68)))))?|\uDC69(?:(?:\uD83C(?:\uDFFB(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFC-\uDFFF]|\uDC68\uD83C[\uDFFC-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFC(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFD-\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFD-\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFD(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF]|\uDC68\uD83C[\uDFFB\uDFFC\uDFFE\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFE(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFD\uDFFF]|\uDC68\uD83C[\uDFFB-\uDFFD\uDFFF])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?|\uDFFF(?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83D(?:\uDC69\uD83C[\uDFFB-\uDFFE]|\uDC68\uD83C[\uDFFB-\uDFFE])|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD]|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D(?:\uDC69\u200D\uD83D(?:\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?)|\uDC66(?:\u200D\uD83D\uDC66)?|\uDC67(?:\u200D\uD83D[\uDC66\uDC67])?|[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92])|\u2708\uFE0F?|\u2764(?:\uFE0F\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])|\u200D\uD83D(?:\uDC8B\u200D\uD83D[\uDC68\uDC69]|[\uDC68\uDC69])))))?|[\uDC74\uDC75](?:\uD83C[\uDFFB-\uDFFF])?|[\uDE4D\uDE4E\uDE45\uDE46\uDC81\uDE4B\uDE47\uDC6E](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD75(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC82\uDC77](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDC78(?:\uD83C[\uDFFB-\uDFFF])?|\uDC73(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC72\uDC70\uDC7C](?:\uD83C[\uDFFB-\uDFFF])?|[\uDC86\uDC87\uDEB6](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDC83\uDD7A](?:\uD83C[\uDFFB-\uDFFF])?|\uDD74(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uDC6F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDEA3\uDEB4\uDEB5](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDEC0\uDECC\uDC6D\uDC6B\uDC6C](?:\uD83C[\uDFFB-\uDFFF])?|\uDDE3\uFE0F?|\uDC15(?:\u200D\uD83E\uDDBA)?|[\uDC3F\uDD4A\uDD77\uDD78\uDDFA\uDEE3\uDEE4\uDEE2\uDEF3\uDEE5\uDEE9\uDEF0\uDECE\uDD70\uDD79\uDDBC\uDD76\uDECD\uDDA5\uDDA8\uDDB1\uDDB2\uDCFD\uDD6F\uDDDE\uDDF3\uDD8B\uDD8A\uDD8C\uDD8D\uDDC2\uDDD2\uDDD3\uDD87\uDDC3\uDDC4\uDDD1\uDDDD\uDEE0\uDDE1\uDEE1\uDDDC\uDECF\uDECB\uDD49]\uFE0F?|[\uDE00\uDE03\uDE04\uDE01\uDE06\uDE05\uDE02\uDE42\uDE43\uDE09\uDE0A\uDE07\uDE0D\uDE18\uDE17\uDE1A\uDE19\uDE0B\uDE1B-\uDE1D\uDE10\uDE11\uDE36\uDE0F\uDE12\uDE44\uDE2C\uDE0C\uDE14\uDE2A\uDE34\uDE37\uDE35\uDE0E\uDE15\uDE1F\uDE41\uDE2E\uDE2F\uDE32\uDE33\uDE26-\uDE28\uDE30\uDE25\uDE22\uDE2D\uDE31\uDE16\uDE23\uDE1E\uDE13\uDE29\uDE2B\uDE24\uDE21\uDE20\uDE08\uDC7F\uDC80\uDCA9\uDC79-\uDC7B\uDC7D\uDC7E\uDE3A\uDE38\uDE39\uDE3B-\uDE3D\uDE40\uDE3F\uDE3E\uDE48-\uDE4A\uDC8B\uDC8C\uDC98\uDC9D\uDC96\uDC97\uDC93\uDC9E\uDC95\uDC9F\uDC94\uDC9B\uDC9A\uDC99\uDC9C\uDDA4\uDCAF\uDCA2\uDCA5\uDCAB\uDCA6\uDCA8\uDCA3\uDCAC\uDCAD\uDCA4\uDC40\uDC45\uDC44\uDC8F\uDC91\uDC6A\uDC64\uDC65\uDC63\uDC35\uDC12\uDC36\uDC29\uDC3A\uDC31\uDC08\uDC2F\uDC05\uDC06\uDC34\uDC0E\uDC2E\uDC02-\uDC04\uDC37\uDC16\uDC17\uDC3D\uDC0F\uDC11\uDC10\uDC2A\uDC2B\uDC18\uDC2D\uDC01\uDC00\uDC39\uDC30\uDC07\uDC3B\uDC28\uDC3C\uDC3E\uDC14\uDC13\uDC23-\uDC27\uDC38\uDC0A\uDC22\uDC0D\uDC32\uDC09\uDC33\uDC0B\uDC2C\uDC1F-\uDC21\uDC19\uDC1A\uDC0C\uDC1B-\uDC1E\uDC90\uDCAE\uDD2A\uDDFE\uDDFB\uDC92\uDDFC\uDDFD\uDD4C\uDED5\uDD4D\uDD4B\uDC88\uDE82-\uDE8A\uDE9D\uDE9E\uDE8B-\uDE8E\uDE90-\uDE9C\uDEF5\uDEFA\uDEB2\uDEF4\uDEF9\uDE8F\uDEA8\uDEA5\uDEA6\uDED1\uDEA7\uDEF6\uDEA4\uDEA2\uDEEB\uDEEC\uDCBA\uDE81\uDE9F-\uDEA1\uDE80\uDEF8\uDD5B\uDD67\uDD50\uDD5C\uDD51\uDD5D\uDD52\uDD5E\uDD53\uDD5F\uDD54\uDD60\uDD55\uDD61\uDD56\uDD62\uDD57\uDD63\uDD58\uDD64\uDD59\uDD65\uDD5A\uDD66\uDD25\uDCA7\uDEF7\uDD2E\uDC53-\uDC62\uDC51\uDC52\uDCFF\uDC84\uDC8D\uDC8E\uDD07-\uDD0A\uDCE2\uDCE3\uDCEF\uDD14\uDD15\uDCFB\uDCF1\uDCF2\uDCDE-\uDCE0\uDD0B\uDD0C\uDCBB\uDCBD-\uDCC0\uDCFA\uDCF7-\uDCF9\uDCFC\uDD0D\uDD0E\uDCA1\uDD26\uDCD4-\uDCDA\uDCD3\uDCD2\uDCC3\uDCDC\uDCC4\uDCF0\uDCD1\uDD16\uDCB0\uDCB4-\uDCB8\uDCB3\uDCB9\uDCB1\uDCB2\uDCE7-\uDCE9\uDCE4-\uDCE6\uDCEB\uDCEA\uDCEC-\uDCEE\uDCDD\uDCBC\uDCC1\uDCC2\uDCC5-\uDCD0\uDD12\uDD13\uDD0F-\uDD11\uDD28\uDD2B\uDD27\uDD29\uDD17\uDD2C\uDD2D\uDCE1\uDC89\uDC8A\uDEAA\uDEBD\uDEBF\uDEC1\uDED2\uDEAC\uDDFF\uDEAE\uDEB0\uDEB9-\uDEBC\uDEBE\uDEC2-\uDEC5\uDEB8\uDEAB\uDEB3\uDEAD\uDEAF\uDEB1\uDEB7\uDCF5\uDD1E\uDD03\uDD04\uDD19-\uDD1D\uDED0\uDD4E\uDD2F\uDD00-\uDD02\uDD3C\uDD3D\uDD05\uDD06\uDCF6\uDCF3\uDCF4\uDD31\uDCDB\uDD30\uDD1F-\uDD24\uDD34\uDFE0-\uDFE2\uDD35\uDFE3-\uDFE5\uDFE7-\uDFE9\uDFE6\uDFEA\uDFEB\uDD36-\uDD3B\uDCA0\uDD18\uDD33\uDD32\uDEA9])|\uD83E(?:[\uDD1A\uDD0F\uDD1E\uDD1F\uDD18\uDD19\uDD1B\uDD1C\uDD32\uDD33\uDDB5\uDDB6\uDDBB\uDDD2](?:\uD83C[\uDFFB-\uDFFF])?|\uDDD1(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1\uD83C[\uDFFB-\uDFFF]|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?))?)|\u200D(?:\uD83E(?:\uDD1D\u200D\uD83E\uDDD1|[\uDDB0\uDDB1\uDDB3\uDDB2\uDDAF\uDDBC\uDDBD])|\u2695\uFE0F?|\uD83C[\uDF93\uDFEB\uDF3E\uDF73\uDFED\uDFA4\uDFA8]|\u2696\uFE0F?|\uD83D[\uDD27\uDCBC\uDD2C\uDCBB\uDE80\uDE92]|\u2708\uFE0F?)))?|[\uDDD4\uDDD3](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDCF\uDD26\uDD37](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD34\uDDD5\uDD35\uDD30\uDD31\uDD36](?:\uD83C[\uDFFB-\uDFFF])?|[\uDDB8\uDDB9\uDDD9-\uDDDD](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDDDE\uDDDF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDDCD\uDDCE\uDDD6\uDDD7\uDD38](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDD3C(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|[\uDD3D\uDD3E\uDD39\uDDD8](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDD23\uDD70\uDD29\uDD2A\uDD11\uDD17\uDD2D\uDD2B\uDD14\uDD10\uDD28\uDD25\uDD24\uDD12\uDD15\uDD22\uDD2E\uDD27\uDD75\uDD76\uDD74\uDD2F\uDD20\uDD73\uDD13\uDDD0\uDD7A\uDD71\uDD2C\uDD21\uDD16\uDDE1\uDD0E\uDD0D\uDD1D\uDDBE\uDDBF\uDDE0\uDDB7\uDDB4\uDD3A\uDDB0\uDDB1\uDDB3\uDDB2\uDD8D\uDDA7\uDDAE\uDD8A\uDD9D\uDD81\uDD84\uDD93\uDD8C\uDD99\uDD92\uDD8F\uDD9B\uDD94\uDD87\uDDA5\uDDA6\uDDA8\uDD98\uDDA1\uDD83\uDD85\uDD86\uDDA2\uDD89\uDDA9\uDD9A\uDD9C\uDD8E\uDD95\uDD96\uDD88\uDD8B\uDD97\uDD82\uDD9F\uDDA0\uDD40\uDD6D\uDD5D\uDD65\uDD51\uDD54\uDD55\uDD52\uDD6C\uDD66\uDDC4\uDDC5\uDD5C\uDD50\uDD56\uDD68\uDD6F\uDD5E\uDDC7\uDDC0\uDD69\uDD53\uDD6A\uDD59\uDDC6\uDD5A\uDD58\uDD63\uDD57\uDDC8\uDDC2\uDD6B\uDD6E\uDD5F-\uDD61\uDD80\uDD9E\uDD90\uDD91\uDDAA\uDDC1\uDD67\uDD5B\uDD42\uDD43\uDD64\uDDC3\uDDC9\uDDCA\uDD62\uDD44\uDDED\uDDF1\uDDBD\uDDBC\uDE82\uDDF3\uDE90\uDDE8\uDDE7\uDD47-\uDD49\uDD4E\uDD4F\uDD4D\uDD4A\uDD4B\uDD45\uDD3F\uDD4C\uDE80\uDE81\uDDFF\uDDE9\uDDF8\uDDF5\uDDF6\uDD7D\uDD7C\uDDBA\uDDE3-\uDDE6\uDD7B\uDE71-\uDE73\uDD7E\uDD7F\uDE70\uDDE2\uDE95\uDD41\uDDEE\uDE94\uDDFE\uDE93\uDDAF\uDDF0\uDDF2\uDDEA-\uDDEC\uDE78-\uDE7A\uDE91\uDE92\uDDF4\uDDF7\uDDF9-\uDDFD\uDDEF])|[\u263A\u2639\u2620\u2763\u2764]\uFE0F?|\u270B(?:\uD83C[\uDFFB-\uDFFF])?|[\u270C\u261D](?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\u270A(?:\uD83C[\uDFFB-\uDFFF])?|\u270D(?:(?:\uD83C[\uDFFB-\uDFFF]|\uFE0F))?|\uD83C(?:\uDF85(?:\uD83C[\uDFFB-\uDFFF])?|\uDFC3(?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC7\uDFC2](?:\uD83C[\uDFFB-\uDFFF])?|\uDFCC(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFC4\uDFCA](?:(?:\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|\uDFCB(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\uDFF5\uDF36\uDF7D\uDFD4-\uDFD6\uDFDC-\uDFDF\uDFDB\uDFD7\uDFD8\uDFDA\uDFD9\uDFCE\uDFCD\uDF21\uDF24-\uDF2C\uDF97\uDF9F\uDF96\uDF99-\uDF9B\uDF9E\uDFF7\uDD70\uDD71\uDD7E\uDD7F\uDE02\uDE37]\uFE0F?|\uDFF4(?:(?:\u200D\u2620\uFE0F?|\uDB40\uDC67\uDB40\uDC62\uDB40(?:\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F|\uDC73\uDB40\uDC63\uDB40\uDC74\uDB40\uDC7F|\uDC77\uDB40\uDC6C\uDB40\uDC73\uDB40\uDC7F)))?|\uDFF3(?:(?:\uFE0F(?:\u200D\uD83C\uDF08)?|\u200D\uD83C\uDF08))?|\uDDE6\uD83C[\uDDE8-\uDDEC\uDDEE\uDDF1\uDDF2\uDDF4\uDDF6-\uDDFA\uDDFC\uDDFD\uDDFF]|\uDDE7\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEF\uDDF1-\uDDF4\uDDF6-\uDDF9\uDDFB\uDDFC\uDDFE\uDDFF]|\uDDE8\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDEE\uDDF0-\uDDF5\uDDF7\uDDFA-\uDDFF]|\uDDE9\uD83C[\uDDEA\uDDEC\uDDEF\uDDF0\uDDF2\uDDF4\uDDFF]|\uDDEA\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDED\uDDF7-\uDDFA]|\uDDEB\uD83C[\uDDEE-\uDDF0\uDDF2\uDDF4\uDDF7]|\uDDEC\uD83C[\uDDE6\uDDE7\uDDE9-\uDDEE\uDDF1-\uDDF3\uDDF5-\uDDFA\uDDFC\uDDFE]|\uDDED\uD83C[\uDDF0\uDDF2\uDDF3\uDDF7\uDDF9\uDDFA]|\uDDEE\uD83C[\uDDE8-\uDDEA\uDDF1-\uDDF4\uDDF6-\uDDF9]|\uDDEF\uD83C[\uDDEA\uDDF2\uDDF4\uDDF5]|\uDDF0\uD83C[\uDDEA\uDDEC-\uDDEE\uDDF2\uDDF3\uDDF5\uDDF7\uDDFC\uDDFE\uDDFF]|\uDDF1\uD83C[\uDDE6-\uDDE8\uDDEE\uDDF0\uDDF7-\uDDFB\uDDFE]|\uDDF2\uD83C[\uDDE6\uDDE8-\uDDED\uDDF0-\uDDFF]|\uDDF3\uD83C[\uDDE6\uDDE8\uDDEA-\uDDEC\uDDEE\uDDF1\uDDF4\uDDF5\uDDF7\uDDFA\uDDFF]|\uDDF4\uD83C\uDDF2|\uDDF5\uD83C[\uDDE6\uDDEA-\uDDED\uDDF0-\uDDF3\uDDF7-\uDDF9\uDDFC\uDDFE]|\uDDF6\uD83C\uDDE6|\uDDF7\uD83C[\uDDEA\uDDF4\uDDF8\uDDFA\uDDFC]|\uDDF8\uD83C[\uDDE6-\uDDEA\uDDEC-\uDDF4\uDDF7-\uDDF9\uDDFB\uDDFD-\uDDFF]|\uDDF9\uD83C[\uDDE6\uDDE8\uDDE9\uDDEB-\uDDED\uDDEF-\uDDF4\uDDF7\uDDF9\uDDFB\uDDFC\uDDFF]|\uDDFA\uD83C[\uDDE6\uDDEC\uDDF2\uDDF3\uDDF8\uDDFE\uDDFF]|\uDDFB\uD83C[\uDDE6\uDDE8\uDDEA\uDDEC\uDDEE\uDDF3\uDDFA]|\uDDFC\uD83C[\uDDEB\uDDF8]|\uDDFD\uD83C\uDDF0|\uDDFE\uD83C[\uDDEA\uDDF9]|\uDDFF\uD83C[\uDDE6\uDDF2\uDDFC]|[\uDFFB-\uDFFF\uDF38-\uDF3C\uDF37\uDF31-\uDF35\uDF3E-\uDF43\uDF47-\uDF53\uDF45\uDF46\uDF3D\uDF44\uDF30\uDF5E\uDF56\uDF57\uDF54\uDF5F\uDF55\uDF2D-\uDF2F\uDF73\uDF72\uDF7F\uDF71\uDF58-\uDF5D\uDF60\uDF62-\uDF65\uDF61\uDF66-\uDF6A\uDF82\uDF70\uDF6B-\uDF6F\uDF7C\uDF75\uDF76\uDF7E\uDF77-\uDF7B\uDF74\uDFFA\uDF0D-\uDF10\uDF0B\uDFE0-\uDFE6\uDFE8-\uDFED\uDFEF\uDFF0\uDF01\uDF03-\uDF07\uDF09\uDFA0-\uDFA2\uDFAA\uDF11-\uDF20\uDF0C\uDF00\uDF08\uDF02\uDF0A\uDF83\uDF84\uDF86-\uDF8B\uDF8D-\uDF91\uDF80\uDF81\uDFAB\uDFC6\uDFC5\uDFC0\uDFD0\uDFC8\uDFC9\uDFBE\uDFB3\uDFCF\uDFD1-\uDFD3\uDFF8\uDFA3\uDFBD\uDFBF\uDFAF\uDFB1\uDFAE\uDFB0\uDFB2\uDCCF\uDC04\uDFB4\uDFAD\uDFA8\uDF92\uDFA9\uDF93\uDFBC\uDFB5\uDFB6\uDFA4\uDFA7\uDFB7-\uDFBB\uDFA5\uDFAC\uDFEE\uDFF9\uDFE7\uDFA6\uDD8E\uDD91-\uDD9A\uDE01\uDE36\uDE2F\uDE50\uDE39\uDE1A\uDE32\uDE51\uDE38\uDE34\uDE33\uDE3A\uDE35\uDFC1\uDF8C])|\u26F7\uFE0F?|\u26F9(?:(?:\uFE0F(?:\u200D(?:[\u2642\u2640]\uFE0F?))?|\uD83C(?:[\uDFFB-\uDFFF](?:\u200D(?:[\u2642\u2640]\uFE0F?))?)|\u200D(?:[\u2642\u2640]\uFE0F?)))?|[\u2618\u26F0\u26E9\u2668\u26F4\u2708\u23F1\u23F2\u2600\u2601\u26C8\u2602\u26F1\u2744\u2603\u2604\u26F8\u2660\u2665\u2666\u2663\u265F\u26D1\u260E\u2328\u2709\u270F\u2712\u2702\u26CF\u2692\u2694\u2699\u2696\u26D3\u2697\u26B0\u26B1\u26A0\u2622\u2623\u2B06\u2197\u27A1\u2198\u2B07\u2199\u2B05\u2196\u2195\u2194\u21A9\u21AA\u2934\u2935\u269B\u2721\u2638\u262F\u271D\u2626\u262A\u262E\u25B6\u23ED\u23EF\u25C0\u23EE\u23F8-\u23FA\u23CF\u2640\u2642\u2695\u267E\u267B\u269C\u2611\u2714\u2716\u303D\u2733\u2734\u2747\u203C\u2049\u3030\u00A9\u00AE\u2122]\uFE0F?|[\u0023\u002A\u0030-\u0039](?:\uFE0F\u20E3|\u20E3)|[\u2139\u24C2\u3297\u3299\u25FC\u25FB\u25AA\u25AB]\uFE0F?|[\u2615\u26EA\u26F2\u26FA\u26FD\u2693\u26F5\u231B\u23F3\u231A\u23F0\u2B50\u26C5\u2614\u26A1\u26C4\u2728\u26BD\u26BE\u26F3\u267F\u26D4\u2648-\u2653\u26CE\u23E9-\u23EC\u2B55\u2705\u274C\u274E\u2795-\u2797\u27B0\u27BF\u2753-\u2755\u2757\u26AB\u26AA\u2B1B\u2B1C\u25FE\u25FD])"; } } \ No newline at end of file From c7bf5f79f8a205a91bda0c8cf40e9486b51c81fa Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Mon, 1 Feb 2021 21:48:47 -0500 Subject: [PATCH 05/14] testing new Hashtag regex --- .../Regexes/HashtagRegexes.cs | 3 +- .../Tools/StatusExtractor.cs | 46 ++++-- .../Tools/StatusExtractorTests.cs | 137 ++++++++++++++---- 3 files changed, 144 insertions(+), 42 deletions(-) diff --git a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs index c5e8ed7..99b2f32 100644 --- a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs +++ b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs @@ -4,6 +4,7 @@ namespace BirdsiteLive.Common.Regexes { public class HashtagRegexes { - public static readonly Regex Hashtag = new Regex(@"(.)(#[a-zA-Z0-9]+)(\s|$|[.,;:!?/|-])"); + public static readonly Regex HashtagName = new Regex(@"^[a-zA-Z0-9_]+$"); + public static readonly Regex Hashtag = new Regex(@"(.?)#([a-zA-Z0-9_]+)(\s|$|[<.,;:!?/|-])"); } } \ No newline at end of file diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index a181ac2..9429096 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -4,6 +4,8 @@ using System.Text.RegularExpressions; using BirdsiteLive.ActivityPub.Models; using BirdsiteLive.Common.Regexes; using BirdsiteLive.Common.Settings; +using BirdsiteLive.Twitter; +using Microsoft.Extensions.Logging; namespace BirdsiteLive.Domain.Tools { @@ -14,7 +16,7 @@ namespace BirdsiteLive.Domain.Tools public class StatusExtractor : IStatusExtractor { - private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)"); + //private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)"); //private readonly Regex _hastagRegex = new Regex(@"#\w+"); //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); @@ -27,29 +29,31 @@ namespace BirdsiteLive.Domain.Tools private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); private readonly InstanceSettings _instanceSettings; + private readonly ILogger _logger; #region Ctor - public StatusExtractor(InstanceSettings instanceSettings) + public StatusExtractor(InstanceSettings instanceSettings, ILogger logger) { _instanceSettings = instanceSettings; + _logger = logger; } #endregion public (string content, Tag[] tags) Extract(string messageContent, bool extractMentions = true) { var tags = new List(); - messageContent = $" {messageContent} "; + //messageContent = $" {messageContent} "; // Replace return lines - messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "

"); - messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
"); - messageContent = Regex.Replace(messageContent, @"\(@", "( @"); - messageContent = Regex.Replace(messageContent, @"\(#", "( #"); + messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "

"); + messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
"); + //messageContent = Regex.Replace(messageContent, @"\(@", "( @"); + //messageContent = Regex.Replace(messageContent, @"\(#", "( #"); - // Secure emojis - var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent); - foreach (Match m in emojiMatch) - messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} "); + //// Secure emojis + //var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent); + //foreach (Match m in emojiMatch) + // messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} "); // Extract Urls var urlMatch = _urlRegex.Matches(messageContent); @@ -83,12 +87,19 @@ namespace BirdsiteLive.Domain.Tools } // Extract Hashtags - var hashtagMatch = OrderByLength(_hastagRegex.Matches(messageContent)); + var hashtagMatch = OrderByLength(HashtagRegexes.Hashtag.Matches(messageContent)); foreach (Match m in hashtagMatch.OrderByDescending(x => x.Length)) { - var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); - var url = $"https://{_instanceSettings.Domain}/tags/{tag}"; + var tag = m.Groups[2].ToString(); + //var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); + if (!HashtagRegexes.HashtagName.IsMatch(tag)) + { + _logger.LogError("Parsing Hashtag failed: {Tag} on {Content}", tag, messageContent); + continue; + } + + var url = $"https://{_instanceSettings.Domain}/tags/{tag}"; tags.Add(new Tag { name = $"#{tag}", @@ -96,8 +107,11 @@ namespace BirdsiteLive.Domain.Tools type = "Hashtag" }); - messageContent = Regex.Replace(messageContent, m.ToString(), - $@" #{tag}"); + //messageContent = Regex.Replace(messageContent, m.ToString(), + // $@" #{tag}"); + + messageContent = Regex.Replace(messageContent, m.Groups[0].ToString(), + $@"{m.Groups[1]}#{tag}{m.Groups[3]}"); } // Extract Mentions diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs index f5dc91a..f126b12 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -3,7 +3,9 @@ using System.Linq; using BirdsiteLive.Common.Settings; using BirdsiteLive.Domain.Tools; using BirdsiteLive.Twitter.Models; +using Microsoft.Extensions.Logging; using Microsoft.VisualStudio.TestTools.UnitTesting; +using Moq; namespace BirdsiteLive.Domain.Tests.Tools { @@ -28,11 +30,16 @@ namespace BirdsiteLive.Domain.Tests.Tools #region Stubs var message = "Bla.\n\n@Mention blo. https://t.co/pgtrJi9600"; #endregion - - var service = new StatusExtractor(_settings); + + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.IsTrue(result.content.Contains("Bla.")); Assert.IsTrue(result.content.Contains("

")); #endregion @@ -45,10 +52,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = "Bla.\n@Mention blo. https://t.co/pgtrJi9600"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.IsTrue(result.content.Contains("Bla.")); Assert.IsTrue(result.content.Contains("
")); #endregion @@ -61,10 +73,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"Bla!{Environment.NewLine}https://t.co/L8BpyHgg25"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(0, result.tags.Length); Assert.IsTrue(result.content.Contains("Bla!")); @@ -79,10 +96,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(0, result.tags.Length); Assert.IsTrue(result.content.Contains("Bla!")); @@ -97,10 +119,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(0, result.tags.Length); Assert.IsTrue(result.content.Contains("Bla!")); @@ -115,10 +142,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"https://t.co/L8BpyHgg25 Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(0, result.tags.Length); Assert.IsTrue(result.content.Contains("Bla!")); @@ -132,13 +164,18 @@ namespace BirdsiteLive.Domain.Tests.Tools public void Extract_SingleHashTag_Test() { #region Stubs - var message = $"Bla!{Environment.NewLine}#mytag⁠"; + var message = $"Bla!{Environment.NewLine}#mytag"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.AreEqual("#mytag", result.tags.First().name); Assert.AreEqual("Hashtag", result.tags.First().type); @@ -153,13 +190,18 @@ namespace BirdsiteLive.Domain.Tests.Tools public void Extract_SingleHashTag_AtStart_Test() { #region Stubs - var message = $"#mytag⁠ Bla!"; + var message = "#mytag Bla!"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.AreEqual("#mytag", result.tags.First().name); Assert.AreEqual("Hashtag", result.tags.First().type); @@ -174,20 +216,25 @@ namespace BirdsiteLive.Domain.Tests.Tools public void Extract_SingleHashTag_SpecialChar_Test() { #region Stubs - var message = $"Bla!{Environment.NewLine}#COVIDー19⁠"; + var message = $"Bla!{Environment.NewLine}#COVID_19"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); - Assert.AreEqual("#COVIDー19", result.tags.First().name); + Assert.AreEqual("#COVID_19", result.tags.First().name); Assert.AreEqual("Hashtag", result.tags.First().type); - Assert.AreEqual("https://domain.name/tags/COVIDー19", result.tags.First().href); + Assert.AreEqual("https://domain.name/tags/COVID_19", result.tags.First().href); Assert.IsTrue(result.content.Contains("Bla!")); - Assert.IsTrue(result.content.Contains(@"#COVIDー19")); + Assert.IsTrue(result.content.Contains(@"#COVID_19")); #endregion } @@ -195,13 +242,18 @@ namespace BirdsiteLive.Domain.Tests.Tools public void Extract_MultiHashTags_Test() { #region Stubs - var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3⁠{Environment.NewLine}Test #bal Test"; + var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(4, result.tags.Length); Assert.IsTrue(result.content.Contains("Bla!")); Assert.IsTrue(result.content.Contains(@"#mytag")); @@ -218,10 +270,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"Bla!{Environment.NewLine}@mynickname⁠"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.AreEqual("@mynickname@domain.name", result.tags.First().name); Assert.AreEqual("Mention", result.tags.First().type); @@ -239,10 +296,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"Bla!{Environment.NewLine}@my___nickname⁠"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.AreEqual("@my___nickname@domain.name", result.tags.First().name); Assert.AreEqual("Mention", result.tags.First().type); @@ -260,10 +322,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"@mynickname Bla!"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.AreEqual("@mynickname@domain.name", result.tags.First().name); Assert.AreEqual("Mention", result.tags.First().type); @@ -281,10 +348,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"Bla!{Environment.NewLine}@mynickname⁠ @mynickname2 @mynickname3{Environment.NewLine}Test @dada Test"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(4, result.tags.Length); Assert.IsTrue(result.content.Contains("Bla!")); Assert.IsTrue(result.content.Contains(@"@mynickname")); @@ -301,10 +373,15 @@ namespace BirdsiteLive.Domain.Tests.Tools var message = $"Bla!{Environment.NewLine}@mynickname⁠ #mytag2 @mynickname3{Environment.NewLine}Test @dada #dada Test"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(5, result.tags.Length); Assert.IsTrue(result.content.Contains("Bla!")); Assert.IsTrue(result.content.Contains(@"@mynickname")); @@ -324,10 +401,15 @@ namespace BirdsiteLive.Domain.Tests.Tools //var message = $"tests@mynickname"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.IsTrue(result.content.Contains( @"😤 @mynickname")); @@ -344,10 +426,15 @@ namespace BirdsiteLive.Domain.Tests.Tools //var message = $"tests@mynickname"; #endregion - var service = new StatusExtractor(_settings); + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); var result = service.Extract(message); #region Validations + logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.IsTrue(result.content.Equals(@"bla ( @mynickname test)")); #endregion From c409a93b185747a77c0d84eb872001a37dbee23c Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 00:24:33 -0500 Subject: [PATCH 06/14] testing new mention regex --- .../Regexes/UserRegexes.cs | 2 +- .../Tools/StatusExtractor.cs | 33 ++++++++++++------- .../Tools/StatusExtractorTests.cs | 13 ++++---- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs index 74c8b2e..a9f1053 100644 --- a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs +++ b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs @@ -5,6 +5,6 @@ namespace BirdsiteLive.Common.Regexes public class UserRegexes { public static readonly Regex TwitterAccount = new Regex(@"^[a-zA-Z0-9_]+$"); - public static readonly Regex Mention = new Regex(@"(.)(@[a-zA-Z0-9_]+)(\s|$|[,;:!?/|-]|(. ))"); + public static readonly Regex Mention = new Regex(@"(.?)@([a-zA-Z0-9_]+)(\s|$|[<,;:!?/|-]|(. ))"); } } \ No newline at end of file diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index 9429096..0d4c1f3 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -21,7 +21,7 @@ namespace BirdsiteLive.Domain.Tools //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); - private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); + //private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); //private readonly Regex _mentionRegex = new Regex(@"@\w+"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); @@ -110,17 +110,25 @@ namespace BirdsiteLive.Domain.Tools //messageContent = Regex.Replace(messageContent, m.ToString(), // $@" #{tag}"); - messageContent = Regex.Replace(messageContent, m.Groups[0].ToString(), + messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()), $@"{m.Groups[1]}#{tag}{m.Groups[3]}"); } // Extract Mentions if (extractMentions) { - var mentionMatch = OrderByLength(_mentionRegex.Matches(messageContent)); + var mentionMatch = OrderByLength(UserRegexes.Mention.Matches(messageContent)); foreach (Match m in mentionMatch.OrderByDescending(x => x.Length)) { - var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim(); + var mention = m.Groups[2].ToString(); + //var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim(); + + if (!UserRegexes.TwitterAccount.IsMatch(mention)) + { + _logger.LogError("Parsing Mention failed: {Mention} on {Content}", mention, messageContent); + continue; + } + var url = $"https://{_instanceSettings.Domain}/users/{mention}"; var name = $"@{mention}@{_instanceSettings.Domain}"; @@ -131,16 +139,19 @@ namespace BirdsiteLive.Domain.Tools type = "Mention" }); - messageContent = Regex.Replace(messageContent, m.ToString(), - $@" @{mention}"); + //messageContent = Regex.Replace(messageContent, m.ToString(), + // $@" @{mention}"); + + messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()), + $@"{m.Groups[1]}@{mention}{m.Groups[3]}"); } } - // Clean up return lines - messageContent = Regex.Replace(messageContent, @"

", "

"); - messageContent = Regex.Replace(messageContent, @"
", "
"); - messageContent = Regex.Replace(messageContent, @" ", " "); - messageContent = Regex.Replace(messageContent, @" ", " "); + //// Clean up return lines + //messageContent = Regex.Replace(messageContent, @"

", "

"); + //messageContent = Regex.Replace(messageContent, @"
", "
"); + //messageContent = Regex.Replace(messageContent, @" ", " "); + //messageContent = Regex.Replace(messageContent, @" ", " "); return (messageContent.Trim(), tags.ToArray()); } diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs index f126b12..4ed8ee8 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -267,7 +267,7 @@ namespace BirdsiteLive.Domain.Tests.Tools public void Extract_SingleMentionTag_Test() { #region Stubs - var message = $"Bla!{Environment.NewLine}@mynickname⁠"; + var message = $"Bla!{Environment.NewLine}@mynickname"; #endregion #region Mocks @@ -293,7 +293,7 @@ namespace BirdsiteLive.Domain.Tests.Tools public void Extract_SingleMentionTag_SpecialChar_Test() { #region Stubs - var message = $"Bla!{Environment.NewLine}@my___nickname⁠"; + var message = $"Bla!{Environment.NewLine}@my___nickname"; #endregion #region Mocks @@ -391,8 +391,7 @@ namespace BirdsiteLive.Domain.Tests.Tools Assert.IsTrue(result.content.Contains(@"#dada")); #endregion } - - + [TestMethod] public void Extract_Emoji_Test() { @@ -412,9 +411,9 @@ namespace BirdsiteLive.Domain.Tests.Tools logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); Assert.IsTrue(result.content.Contains( - @"😤 @mynickname")); + @"😤@mynickname")); - Assert.IsTrue(result.content.Contains(@"😎 😍 🤗 🤩 😘")); + Assert.IsTrue(result.content.Contains(@"😎😍🤗🤩😘")); #endregion } @@ -436,7 +435,7 @@ namespace BirdsiteLive.Domain.Tests.Tools #region Validations logger.VerifyAll(); Assert.AreEqual(1, result.tags.Length); - Assert.IsTrue(result.content.Equals(@"bla ( @mynickname test)")); + Assert.IsTrue(result.content.Equals(@"bla (@mynickname test)")); #endregion } } From 8daebbc8199bf617ba14c1b42744f3f56e5f7057 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 00:25:36 -0500 Subject: [PATCH 07/14] clean up --- .../Tools/StatusExtractor.cs | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index 0d4c1f3..b6b571c 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -42,13 +42,10 @@ namespace BirdsiteLive.Domain.Tools public (string content, Tag[] tags) Extract(string messageContent, bool extractMentions = true) { var tags = new List(); - //messageContent = $" {messageContent} "; // Replace return lines messageContent = Regex.Replace(messageContent, @"\r\n\r\n?|\n\n", "

"); messageContent = Regex.Replace(messageContent, @"\r\n?|\n", "
"); - //messageContent = Regex.Replace(messageContent, @"\(@", "( @"); - //messageContent = Regex.Replace(messageContent, @"\(#", "( #"); //// Secure emojis //var emojiMatch = EmojiRegexes.Emoji.Matches(messageContent); @@ -91,7 +88,6 @@ namespace BirdsiteLive.Domain.Tools foreach (Match m in hashtagMatch.OrderByDescending(x => x.Length)) { var tag = m.Groups[2].ToString(); - //var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim(); if (!HashtagRegexes.HashtagName.IsMatch(tag)) { @@ -107,9 +103,6 @@ namespace BirdsiteLive.Domain.Tools type = "Hashtag" }); - //messageContent = Regex.Replace(messageContent, m.ToString(), - // $@" #{tag}"); - messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()), $@"{m.Groups[1]}#{tag}{m.Groups[3]}"); } @@ -121,7 +114,6 @@ namespace BirdsiteLive.Domain.Tools foreach (Match m in mentionMatch.OrderByDescending(x => x.Length)) { var mention = m.Groups[2].ToString(); - //var mention = m.ToString().Replace("@", string.Empty).Replace("\n", string.Empty).Trim(); if (!UserRegexes.TwitterAccount.IsMatch(mention)) { @@ -138,21 +130,12 @@ namespace BirdsiteLive.Domain.Tools href = url, type = "Mention" }); - - //messageContent = Regex.Replace(messageContent, m.ToString(), - // $@" @{mention}"); - + messageContent = Regex.Replace(messageContent, Regex.Escape(m.Groups[0].ToString()), $@"{m.Groups[1]}@{mention}{m.Groups[3]}"); } } - //// Clean up return lines - //messageContent = Regex.Replace(messageContent, @"

", "

"); - //messageContent = Regex.Replace(messageContent, @"
", "
"); - //messageContent = Regex.Replace(messageContent, @" ", " "); - //messageContent = Regex.Replace(messageContent, @" ", " "); - return (messageContent.Trim(), tags.ToArray()); } From 0c6ee3dd4decc80103508b976eaeea0961250e06 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 00:26:26 -0500 Subject: [PATCH 08/14] clean up --- src/BirdsiteLive.Domain/Tools/StatusExtractor.cs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index b6b571c..a43ccb0 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -16,18 +16,6 @@ namespace BirdsiteLive.Domain.Tools public class StatusExtractor : IStatusExtractor { - //private readonly Regex _hastagRegex = new Regex(@"\W(\#[a-zA-Z0-9_ー]+\b)(?!;)"); - //private readonly Regex _hastagRegex = new Regex(@"#\w+"); - //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); - //private readonly Regex _hastagRegex = new Regex(@"(?<=[\s>]|^)#(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); - - //private readonly Regex _mentionRegex = new Regex(@"\W(\@[a-zA-Z0-9_ー]+\b)(?!;)"); - //private readonly Regex _mentionRegex = new Regex(@"@\w+"); - //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+\w*)\b(?!;)"); - //private readonly Regex _mentionRegex = new Regex(@"(?<=[\s>]|^)@(\w*[a-zA-Z0-9_ー]+)\b(?!;)"); - - private readonly Regex _urlRegex = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); - private readonly InstanceSettings _instanceSettings; private readonly ILogger _logger; @@ -53,7 +41,7 @@ namespace BirdsiteLive.Domain.Tools // messageContent = Regex.Replace(messageContent, m.ToString(), $" {m} "); // Extract Urls - var urlMatch = _urlRegex.Matches(messageContent); + var urlMatch = UrlRegexes.Url.Matches(messageContent); foreach (Match m in urlMatch) { var url = m.ToString().Replace("\n", string.Empty).Trim(); From 25d1f360badcf9e048bc0f5a6d06ba0777e52254 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 00:27:48 -0500 Subject: [PATCH 09/14] added test --- .../Tools/StatusExtractorTests.cs | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs index 4ed8ee8..92ff130 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -289,6 +289,33 @@ namespace BirdsiteLive.Domain.Tests.Tools #endregion } + [TestMethod] + public void Extract_SingleMentionTag_Dot_Test() + { + #region Stubs + var message = $".@mynickname Bla!{Environment.NewLine}Blo"; + #endregion + + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); + var result = service.Extract(message); + + #region Validations + logger.VerifyAll(); + Assert.AreEqual(1, result.tags.Length); + Assert.AreEqual("@mynickname@domain.name", result.tags.First().name); + Assert.AreEqual("Mention", result.tags.First().type); + Assert.AreEqual("https://domain.name/users/mynickname", result.tags.First().href); + + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains("Blo")); + Assert.IsTrue(result.content.Contains(@"@mynickname")); + #endregion + } + [TestMethod] public void Extract_SingleMentionTag_SpecialChar_Test() { From 717f69054269983fc286d3a7be983769e717c497 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 00:38:48 -0500 Subject: [PATCH 10/14] fix tests --- .../RetrieveTweetsProcessorTests.cs | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs index d66c2f7..b35a688 100644 --- a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs +++ b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs @@ -7,6 +7,7 @@ using BirdsiteLive.DAL.Models; using BirdsiteLive.Pipeline.Processors; using BirdsiteLive.Twitter; using BirdsiteLive.Twitter.Models; +using Microsoft.Extensions.Logging; using Microsoft.VisualStudio.TestTools.UnitTesting; using Moq; @@ -59,14 +60,20 @@ namespace BirdsiteLive.Pipeline.Tests.Processors It.IsAny() )) .Returns(Task.CompletedTask); + + var twitterUserServiceMock = new Mock(MockBehavior.Strict); + + var logger = new Mock>(MockBehavior.Strict); #endregion - var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object); + var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object, twitterUserServiceMock.Object, logger.Object); var usersResult = await processor.ProcessAsync(users, CancellationToken.None); #region Validations twitterServiceMock.VerifyAll(); twitterUserDalMock.VerifyAll(); + twitterUserServiceMock.VerifyAll(); + logger.VerifyAll(); Assert.AreEqual(0, usersResult.Length); #endregion @@ -117,14 +124,21 @@ namespace BirdsiteLive.Pipeline.Tests.Processors .Returns(tweets); var twitterUserDalMock = new Mock(MockBehavior.Strict); + + var twitterUserServiceMock = new Mock(MockBehavior.Strict); + + var logger = new Mock>(MockBehavior.Strict); #endregion - var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object); + var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object, twitterUserServiceMock.Object, logger.Object); var usersResult = await processor.ProcessAsync(users, CancellationToken.None); #region Validations twitterServiceMock.VerifyAll(); twitterUserDalMock.VerifyAll(); + twitterUserServiceMock.VerifyAll(); + logger.VerifyAll(); + Assert.AreEqual(users.Length, usersResult.Length); Assert.AreEqual(users[0].Acct, usersResult[0].User.Acct); @@ -177,14 +191,20 @@ namespace BirdsiteLive.Pipeline.Tests.Processors .Returns(tweets); var twitterUserDalMock = new Mock(MockBehavior.Strict); + + var twitterUserServiceMock = new Mock(MockBehavior.Strict); + + var logger = new Mock>(MockBehavior.Strict); #endregion - var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object); + var processor = new RetrieveTweetsProcessor(twitterServiceMock.Object, twitterUserDalMock.Object, twitterUserServiceMock.Object, logger.Object); var usersResult = await processor.ProcessAsync(users, CancellationToken.None); #region Validations twitterServiceMock.VerifyAll(); twitterUserDalMock.VerifyAll(); + twitterUserServiceMock.VerifyAll(); + logger.VerifyAll(); Assert.AreEqual(users.Length, usersResult.Length); Assert.AreEqual(users[0].Acct, usersResult[0].User.Acct); From 32b53e09e2dd63477e6e3c40bfea92f21eb4eea8 Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 19:25:12 -0500 Subject: [PATCH 11/14] fine-tuning regex --- .../Regexes/HashtagRegexes.cs | 2 +- .../Regexes/UserRegexes.cs | 2 +- .../Tools/StatusExtractorTests.cs | 26 +++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs index 99b2f32..b6d9d00 100644 --- a/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs +++ b/src/BirdsiteLive.Common/Regexes/HashtagRegexes.cs @@ -5,6 +5,6 @@ namespace BirdsiteLive.Common.Regexes public class HashtagRegexes { public static readonly Regex HashtagName = new Regex(@"^[a-zA-Z0-9_]+$"); - public static readonly Regex Hashtag = new Regex(@"(.?)#([a-zA-Z0-9_]+)(\s|$|[<.,;:!?/|-])"); + public static readonly Regex Hashtag = new Regex(@"(.?)#([a-zA-Z0-9_]+)(\s|$|[\[\]<>.,;:!?/|-])"); } } \ No newline at end of file diff --git a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs index a9f1053..c20a544 100644 --- a/src/BirdsiteLive.Common/Regexes/UserRegexes.cs +++ b/src/BirdsiteLive.Common/Regexes/UserRegexes.cs @@ -5,6 +5,6 @@ namespace BirdsiteLive.Common.Regexes public class UserRegexes { public static readonly Regex TwitterAccount = new Regex(@"^[a-zA-Z0-9_]+$"); - public static readonly Regex Mention = new Regex(@"(.?)@([a-zA-Z0-9_]+)(\s|$|[<,;:!?/|-]|(. ))"); + public static readonly Regex Mention = new Regex(@"(.?)@([a-zA-Z0-9_]+)(\s|$|[\[\]<>,;:!?/|-]|(. ))"); } } \ No newline at end of file diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs index 92ff130..924461f 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -289,6 +289,32 @@ namespace BirdsiteLive.Domain.Tests.Tools #endregion } + [TestMethod] + public void Extract_SingleMentionTag_RT_Test() + { + #region Stubs + var message = $"[RT @mynickname]{Environment.NewLine}Bla!"; + #endregion + + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); + var result = service.Extract(message); + + #region Validations + logger.VerifyAll(); + Assert.AreEqual(1, result.tags.Length); + Assert.AreEqual("@mynickname@domain.name", result.tags.First().name); + Assert.AreEqual("Mention", result.tags.First().type); + Assert.AreEqual("https://domain.name/users/mynickname", result.tags.First().href); + + Assert.IsTrue(result.content.Contains("Bla!")); + Assert.IsTrue(result.content.Contains(@"@mynickname")); + #endregion + } + [TestMethod] public void Extract_SingleMentionTag_Dot_Test() { From 7ddda8d18c4ec473c236410ff57699e8312182ce Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 20:05:59 -0500 Subject: [PATCH 12/14] better RT extraction --- .../Extractors/TweetExtractor.cs | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs b/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs index c432a35..7fce30c 100644 --- a/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs +++ b/src/BirdsiteLive.Twitter/Extractors/TweetExtractor.cs @@ -23,7 +23,7 @@ namespace BirdsiteLive.Twitter.Extractors InReplyToStatusId = tweet.InReplyToStatusId, InReplyToAccount = tweet.InReplyToScreenName, MessageContent = ExtractMessage(tweet), - Media = ExtractMedia(tweet.Media), + Media = ExtractMedia(tweet), CreatedAt = tweet.CreatedAt.ToUniversalTime(), IsReply = tweet.InReplyToUserId != null, IsThread = tweet.InReplyToUserId != null && tweet.InReplyToUserId == tweet.CreatedBy.Id, @@ -36,10 +36,17 @@ namespace BirdsiteLive.Twitter.Extractors private string ExtractRetweetUrl(ITweet tweet) { - if (tweet.IsRetweet && tweet.FullText.Contains("https://t.co/")) + if (tweet.IsRetweet) { - var retweetId = tweet.FullText.Split(new[] { "https://t.co/" }, StringSplitOptions.RemoveEmptyEntries).Last(); - return $"https://t.co/{retweetId}"; + if (tweet.RetweetedTweet != null) + { + return tweet.RetweetedTweet.Url; + } + if (tweet.FullText.Contains("https://t.co/")) + { + var retweetId = tweet.FullText.Split(new[] { "https://t.co/" }, StringSplitOptions.RemoveEmptyEntries).Last(); + return $"https://t.co/{retweetId}"; + } } return null; @@ -47,8 +54,15 @@ namespace BirdsiteLive.Twitter.Extractors public string ExtractMessage(ITweet tweet) { - var tweetUrls = tweet.Media.Select(x => x.URL).Distinct(); var message = tweet.FullText; + var tweetUrls = tweet.Media.Select(x => x.URL).Distinct(); + + if (tweet.IsRetweet && tweet.QuotedStatusId == null && message.StartsWith("RT") && tweet.RetweetedTweet != null) + { + message = tweet.RetweetedTweet.FullText; + tweetUrls = tweet.RetweetedTweet.Media.Select(x => x.URL).Distinct(); + } + foreach (var tweetUrl in tweetUrls) { if(tweet.IsRetweet) @@ -60,8 +74,10 @@ namespace BirdsiteLive.Twitter.Extractors if (tweet.QuotedTweet != null) message = $"[Quote {{RT}}]{Environment.NewLine}{message}"; if (tweet.IsRetweet) { - if (tweet.RetweetedTweet != null) + if (tweet.RetweetedTweet != null && !message.StartsWith("RT")) message = $"[{{RT}} @{tweet.RetweetedTweet.CreatedBy.ScreenName}]{Environment.NewLine}{message}"; + else if (tweet.RetweetedTweet != null && message.StartsWith($"RT @{tweet.RetweetedTweet.CreatedBy.ScreenName}:")) + message = message.Replace($"RT @{tweet.RetweetedTweet.CreatedBy.ScreenName}:", $"[{{RT}} @{tweet.RetweetedTweet.CreatedBy.ScreenName}]{Environment.NewLine}"); else message = message.Replace("RT", "[{{RT}}]"); } @@ -73,10 +89,13 @@ namespace BirdsiteLive.Twitter.Extractors return message; } - public ExtractedMedia[] ExtractMedia(List media) + public ExtractedMedia[] ExtractMedia(ITweet tweet) { - var result = new List(); + var media = tweet.Media; + if (tweet.IsRetweet && tweet.RetweetedTweet != null) + media = tweet.RetweetedTweet.Media; + var result = new List(); foreach (var m in media) { var mediaUrl = GetMediaUrl(m); From 299ad64269dfabd23a50abea1c39cd80b5ad5dac Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 20:50:28 -0500 Subject: [PATCH 13/14] road to 0.13.0 --- src/BirdsiteLive/BirdsiteLive.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BirdsiteLive/BirdsiteLive.csproj b/src/BirdsiteLive/BirdsiteLive.csproj index cef4948..607a195 100644 --- a/src/BirdsiteLive/BirdsiteLive.csproj +++ b/src/BirdsiteLive/BirdsiteLive.csproj @@ -4,7 +4,7 @@ netcoreapp3.1 d21486de-a812-47eb-a419-05682bb68856 Linux - 0.12.2 + 0.13.0 From 2e8313301b4f790530d33b352af9e5b5f973ac2c Mon Sep 17 00:00:00 2001 From: Nicolas Constant Date: Tue, 2 Feb 2021 22:49:37 -0500 Subject: [PATCH 14/14] better DI --- .../Processors/RetrieveTweetsProcessor.cs | 13 ++++--------- src/BirdsiteLive.Twitter/CachedTwitterService.cs | 7 ++++++- .../BirdsiteLive.Pipeline.Tests.csproj | 10 ++++++---- .../Processors/RetrieveTweetsProcessorTests.cs | 6 +++--- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs index c0976c5..e60c121 100644 --- a/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs +++ b/src/BirdsiteLive.Pipeline/Processors/RetrieveTweetsProcessor.cs @@ -17,12 +17,12 @@ namespace BirdsiteLive.Pipeline.Processors public class RetrieveTweetsProcessor : IRetrieveTweetsProcessor { private readonly ITwitterTweetsService _twitterTweetsService; - private readonly ITwitterUserService _twitterUserService; + private readonly ICachedTwitterUserService _twitterUserService; private readonly ITwitterUserDal _twitterUserDal; private readonly ILogger _logger; #region Ctor - public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal, ITwitterUserService twitterUserService, ILogger logger) + public RetrieveTweetsProcessor(ITwitterTweetsService twitterTweetsService, ITwitterUserDal twitterUserDal, ICachedTwitterUserService twitterUserService, ILogger logger) { _twitterTweetsService = twitterTweetsService; _twitterUserDal = twitterUserDal; @@ -77,13 +77,8 @@ namespace BirdsiteLive.Pipeline.Processors } catch (Exception e) { - _logger.LogError(e, "Error retrieving TL of {Username} from {LastTweetPostedId}", user.Acct, user.LastTweetPostedId); - - if (_twitterUserService is CachedTwitterUserService service) - { - _logger.LogInformation("Purge {Username} from cache", user.Acct); - service.PurgeUser(user.Acct); - } + _logger.LogError(e, "Error retrieving TL of {Username} from {LastTweetPostedId}, purging user from cache", user.Acct, user.LastTweetPostedId); + _twitterUserService.PurgeUser(user.Acct); } return tweets; diff --git a/src/BirdsiteLive.Twitter/CachedTwitterService.cs b/src/BirdsiteLive.Twitter/CachedTwitterService.cs index d8ca1fb..d9b90d2 100644 --- a/src/BirdsiteLive.Twitter/CachedTwitterService.cs +++ b/src/BirdsiteLive.Twitter/CachedTwitterService.cs @@ -4,7 +4,12 @@ using Microsoft.Extensions.Caching.Memory; namespace BirdsiteLive.Twitter { - public class CachedTwitterUserService : ITwitterUserService + public interface ICachedTwitterUserService : ITwitterUserService + { + void PurgeUser(string username); + } + + public class CachedTwitterUserService : ICachedTwitterUserService { private readonly ITwitterUserService _twitterService; diff --git a/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj b/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj index aa7750b..d1cfd06 100644 --- a/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj +++ b/src/Tests/BirdsiteLive.Pipeline.Tests/BirdsiteLive.Pipeline.Tests.csproj @@ -14,12 +14,14 @@ - - - - + + + + + + diff --git a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs index b35a688..02fd7bd 100644 --- a/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs +++ b/src/Tests/BirdsiteLive.Pipeline.Tests/Processors/RetrieveTweetsProcessorTests.cs @@ -61,7 +61,7 @@ namespace BirdsiteLive.Pipeline.Tests.Processors )) .Returns(Task.CompletedTask); - var twitterUserServiceMock = new Mock(MockBehavior.Strict); + var twitterUserServiceMock = new Mock(MockBehavior.Strict); var logger = new Mock>(MockBehavior.Strict); #endregion @@ -125,7 +125,7 @@ namespace BirdsiteLive.Pipeline.Tests.Processors var twitterUserDalMock = new Mock(MockBehavior.Strict); - var twitterUserServiceMock = new Mock(MockBehavior.Strict); + var twitterUserServiceMock = new Mock(MockBehavior.Strict); var logger = new Mock>(MockBehavior.Strict); #endregion @@ -192,7 +192,7 @@ namespace BirdsiteLive.Pipeline.Tests.Processors var twitterUserDalMock = new Mock(MockBehavior.Strict); - var twitterUserServiceMock = new Mock(MockBehavior.Strict); + var twitterUserServiceMock = new Mock(MockBehavior.Strict); var logger = new Mock>(MockBehavior.Strict); #endregion