diff --git a/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs b/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs index ea3e5c2..1f2b279 100644 --- a/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs +++ b/src/BirdsiteLive.Common/Regexes/UrlRegexes.cs @@ -4,6 +4,6 @@ namespace BirdsiteLive.Common.Regexes { public class UrlRegexes { - public static readonly Regex Url = new Regex(@"((http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); + public static readonly Regex Url = new Regex(@"(.?)(((http|ftp|https):\/\/)[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)"); } } \ No newline at end of file diff --git a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs index a43ccb0..7df4c0e 100644 --- a/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs +++ b/src/BirdsiteLive.Domain/Tools/StatusExtractor.cs @@ -44,11 +44,8 @@ namespace BirdsiteLive.Domain.Tools var urlMatch = UrlRegexes.Url.Matches(messageContent); foreach (Match m in urlMatch) { - var url = m.ToString().Replace("\n", string.Empty).Trim(); - - var protocol = "https://"; - if (url.StartsWith("http://")) protocol = "http://"; - else if (url.StartsWith("ftp://")) protocol = "ftp://"; + var url = m.Groups[2].ToString(); + var protocol = m.Groups[3].ToString(); var truncatedUrl = url.Replace(protocol, string.Empty); @@ -68,7 +65,7 @@ namespace BirdsiteLive.Domain.Tools } messageContent = Regex.Replace(messageContent, m.ToString(), - $@" {protocol}{firstPart}{secondPart}"); + $@"{m.Groups[1]}{protocol}{firstPart}{secondPart}"); } // Extract Hashtags diff --git a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs index 924461f..06d8f9a 100644 --- a/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs +++ b/src/Tests/BirdsiteLive.Domain.Tests/Tools/StatusExtractorTests.cs @@ -136,7 +136,7 @@ namespace BirdsiteLive.Domain.Tests.Tools } [TestMethod] - public void Extract_MultiUrls__Test() + public void Extract_MultiUrls_Test() { #region Stubs var message = $"https://t.co/L8BpyHgg25 Bla!{Environment.NewLine}https://www.eff.org/deeplinks/2020/07/pact-act-not-solution-problem-harmful-online-content"; @@ -160,6 +160,63 @@ namespace BirdsiteLive.Domain.Tests.Tools #endregion } + [TestMethod] + public void Extract_SmallUrl_Test() + { + #region Stubs + var message = @"🚀 test http://GOV.UK date 🎉 data http://GOV.UK woopsi."; + #endregion + + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); + var result = service.Extract(message); + + #region Validations + Assert.AreEqual(@"🚀 test http://GOV.UK date 🎉 data http://GOV.UK woopsi.", result.content); + #endregion + } + + [TestMethod] + public void Extract_SmallUrl_2_Test() + { + #region Stubs + var message = @"🚀http://GOV.UK"; + #endregion + + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); + var result = service.Extract(message); + + #region Validations + Assert.AreEqual(@"🚀http://GOV.UK", result.content); + #endregion + } + + [TestMethod] + public void Extract_SmallUrl_3_Test() + { + #region Stubs + var message = @"🚀http://GOV.UK."; + #endregion + + #region Mocks + var logger = new Mock>(); + #endregion + + var service = new StatusExtractor(_settings, logger.Object); + var result = service.Extract(message); + + #region Validations + Assert.AreEqual(@"🚀http://GOV.UK.", result.content); + #endregion + } + [TestMethod] public void Extract_SingleHashTag_Test() {