testing hashtag extraction

This commit is contained in:
Nicolas Constant 2020-07-31 22:49:00 -04:00
parent bfc4dcb4fd
commit 83507614a4
No known key found for this signature in database
GPG key ID: 1E9F677FB01A5688
4 changed files with 134 additions and 52 deletions

View file

@ -6,6 +6,7 @@ using System.Text.RegularExpressions;
using BirdsiteLive.ActivityPub; using BirdsiteLive.ActivityPub;
using BirdsiteLive.ActivityPub.Models; using BirdsiteLive.ActivityPub.Models;
using BirdsiteLive.Common.Settings; using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain.Tools;
using BirdsiteLive.Twitter.Models; using BirdsiteLive.Twitter.Models;
using Tweetinvi.Models; using Tweetinvi.Models;
using Tweetinvi.Models.Entities; using Tweetinvi.Models.Entities;
@ -20,11 +21,13 @@ namespace BirdsiteLive.Domain
public class StatusService : IStatusService public class StatusService : IStatusService
{ {
private readonly InstanceSettings _instanceSettings; private readonly InstanceSettings _instanceSettings;
private readonly IStatusExtractor _statusExtractor;
#region Ctor #region Ctor
public StatusService(InstanceSettings instanceSettings) public StatusService(InstanceSettings instanceSettings, IStatusExtractor statusExtractor)
{ {
_instanceSettings = instanceSettings; _instanceSettings = instanceSettings;
_statusExtractor = statusExtractor;
} }
#endregion #endregion
@ -37,7 +40,7 @@ namespace BirdsiteLive.Domain
var to = $"{actorUrl}/followers"; var to = $"{actorUrl}/followers";
var apPublic = "https://www.w3.org/ns/activitystreams#Public"; var apPublic = "https://www.w3.org/ns/activitystreams#Public";
var extractedTags = ExtractTags(tweet.MessageContent); var extractedTags = _statusExtractor.ExtractTags(tweet.MessageContent);
var note = new Note var note = new Note
{ {
@ -64,32 +67,6 @@ namespace BirdsiteLive.Domain
return note; return note;
} }
private (string content, Tag[] tags) ExtractTags(string messageContent)
{
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
var match = regex.Matches(messageContent);
var tags = new List<Tag>();
foreach (var m in match)
{
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
tags.Add(new Tag
{
name = $"#{tag}",
href = url,
type = "Hashtag"
});
messageContent = messageContent.Replace(
$"#{tag}",
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
}
return (messageContent, new Tag[0]);
}
private Attachment[] Convert(ExtractedMedia[] media) private Attachment[] Convert(ExtractedMedia[] media)
{ {
if(media == null) return new Attachment[0]; if(media == null) return new Attachment[0];

View file

@ -1,7 +1,53 @@
namespace BirdsiteLive.Domain.Tools using System.Collections.Generic;
{ using System.Text.RegularExpressions;
public class StatusExtractor using BirdsiteLive.ActivityPub.Models;
{ using BirdsiteLive.Common.Settings;
namespace BirdsiteLive.Domain.Tools
{
public interface IStatusExtractor
{
(string content, Tag[] tags) ExtractTags(string messageContent);
}
public class StatusExtractor : IStatusExtractor
{
private readonly InstanceSettings _instanceSettings;
#region Ctor
public StatusExtractor(InstanceSettings instanceSettings)
{
_instanceSettings = instanceSettings;
}
#endregion
public (string content, Tag[] tags) ExtractTags(string messageContent)
{
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
var match = regex.Matches(messageContent);
var tags = new List<Tag>();
foreach (var m in match)
{
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
tags.Add(new Tag
{
name = $"#{tag}",
href = url,
type = "Hashtag"
});
messageContent = Regex.Replace(messageContent, m.ToString(),
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
//messageContent = messageContent.Replace(
// $"#{tag}",
// $@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
}
return (messageContent, new Tag[0]);
}
} }
} }

View file

@ -20,29 +20,29 @@ namespace BirdsiteLive.Domain.Tests
} }
#endregion #endregion
[TestMethod] // [TestMethod]
public void ExtractMentionsTest() // public void ExtractMentionsTest()
{ // {
#region Stubs // #region Stubs
var username = "MyUserName"; // var username = "MyUserName";
var extractedTweet = new ExtractedTweet // var extractedTweet = new ExtractedTweet
{ // {
Id = 124L, // Id = 124L,
CreatedAt = DateTime.UtcNow, // CreatedAt = DateTime.UtcNow,
MessageContent = @"Getting ready for the weekend...have a great one everyone! // MessageContent = @"Getting ready for the weekend...have a great one everyone!
//
Photo by Tim Tronckoe | @timtronckoe //Photo by Tim Tronckoe | @timtronckoe
//
#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo" //#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo"
}; // };
#endregion // #endregion
var service = new StatusService(_settings); // var service = new StatusService(_settings);
var result = service.GetStatus(username, extractedTweet); // var result = service.GetStatus(username, extractedTweet);
#region Validations // #region Validations
#endregion // #endregion
} // }
} }
} }

View file

@ -0,0 +1,59 @@
using System;
using BirdsiteLive.Common.Settings;
using BirdsiteLive.Domain.Tools;
using BirdsiteLive.Twitter.Models;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace BirdsiteLive.Domain.Tests.Tools
{
[TestClass]
public class StatusExtractorTests
{
private readonly InstanceSettings _settings;
#region Ctor
public StatusExtractorTests()
{
_settings = new InstanceSettings
{
Domain = "domain.name"
};
}
#endregion
[TestMethod]
public void Extract_SingleTag_Test()
{
#region Stubs
var message = $"Bla!{Environment.NewLine}#mytag";
#endregion
var service = new StatusExtractor(_settings);
var result = service.ExtractTags(message);
#region Validations
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
#endregion
}
[TestMethod]
public void Extract_MultiTags_Test()
{
#region Stubs
var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
#endregion
var service = new StatusExtractor(_settings);
var result = service.ExtractTags(message);
#region Validations
Assert.IsTrue(result.content.Contains("Bla!"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag2"" class=""mention hashtag"" rel=""tag"">#<span>mytag2</span></a>"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag3"" class=""mention hashtag"" rel=""tag"">#<span>mytag3</span></a>"));
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/bal"" class=""mention hashtag"" rel=""tag"">#<span>bal</span></a>"));
#endregion
}
}
}