testing hashtag extraction
This commit is contained in:
parent
bfc4dcb4fd
commit
83507614a4
4 changed files with 134 additions and 52 deletions
|
@ -6,6 +6,7 @@ using System.Text.RegularExpressions;
|
||||||
using BirdsiteLive.ActivityPub;
|
using BirdsiteLive.ActivityPub;
|
||||||
using BirdsiteLive.ActivityPub.Models;
|
using BirdsiteLive.ActivityPub.Models;
|
||||||
using BirdsiteLive.Common.Settings;
|
using BirdsiteLive.Common.Settings;
|
||||||
|
using BirdsiteLive.Domain.Tools;
|
||||||
using BirdsiteLive.Twitter.Models;
|
using BirdsiteLive.Twitter.Models;
|
||||||
using Tweetinvi.Models;
|
using Tweetinvi.Models;
|
||||||
using Tweetinvi.Models.Entities;
|
using Tweetinvi.Models.Entities;
|
||||||
|
@ -20,11 +21,13 @@ namespace BirdsiteLive.Domain
|
||||||
public class StatusService : IStatusService
|
public class StatusService : IStatusService
|
||||||
{
|
{
|
||||||
private readonly InstanceSettings _instanceSettings;
|
private readonly InstanceSettings _instanceSettings;
|
||||||
|
private readonly IStatusExtractor _statusExtractor;
|
||||||
|
|
||||||
#region Ctor
|
#region Ctor
|
||||||
public StatusService(InstanceSettings instanceSettings)
|
public StatusService(InstanceSettings instanceSettings, IStatusExtractor statusExtractor)
|
||||||
{
|
{
|
||||||
_instanceSettings = instanceSettings;
|
_instanceSettings = instanceSettings;
|
||||||
|
_statusExtractor = statusExtractor;
|
||||||
}
|
}
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
|
@ -37,7 +40,7 @@ namespace BirdsiteLive.Domain
|
||||||
var to = $"{actorUrl}/followers";
|
var to = $"{actorUrl}/followers";
|
||||||
var apPublic = "https://www.w3.org/ns/activitystreams#Public";
|
var apPublic = "https://www.w3.org/ns/activitystreams#Public";
|
||||||
|
|
||||||
var extractedTags = ExtractTags(tweet.MessageContent);
|
var extractedTags = _statusExtractor.ExtractTags(tweet.MessageContent);
|
||||||
|
|
||||||
var note = new Note
|
var note = new Note
|
||||||
{
|
{
|
||||||
|
@ -64,32 +67,6 @@ namespace BirdsiteLive.Domain
|
||||||
return note;
|
return note;
|
||||||
}
|
}
|
||||||
|
|
||||||
private (string content, Tag[] tags) ExtractTags(string messageContent)
|
|
||||||
{
|
|
||||||
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
|
|
||||||
var match = regex.Matches(messageContent);
|
|
||||||
|
|
||||||
var tags = new List<Tag>();
|
|
||||||
foreach (var m in match)
|
|
||||||
{
|
|
||||||
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
|
|
||||||
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
|
|
||||||
|
|
||||||
tags.Add(new Tag
|
|
||||||
{
|
|
||||||
name = $"#{tag}",
|
|
||||||
href = url,
|
|
||||||
type = "Hashtag"
|
|
||||||
});
|
|
||||||
|
|
||||||
messageContent = messageContent.Replace(
|
|
||||||
$"#{tag}",
|
|
||||||
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
|
|
||||||
}
|
|
||||||
|
|
||||||
return (messageContent, new Tag[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Attachment[] Convert(ExtractedMedia[] media)
|
private Attachment[] Convert(ExtractedMedia[] media)
|
||||||
{
|
{
|
||||||
if(media == null) return new Attachment[0];
|
if(media == null) return new Attachment[0];
|
||||||
|
|
|
@ -1,7 +1,53 @@
|
||||||
namespace BirdsiteLive.Domain.Tools
|
using System.Collections.Generic;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using BirdsiteLive.ActivityPub.Models;
|
||||||
|
using BirdsiteLive.Common.Settings;
|
||||||
|
|
||||||
|
namespace BirdsiteLive.Domain.Tools
|
||||||
{
|
{
|
||||||
public class StatusExtractor
|
public interface IStatusExtractor
|
||||||
{
|
{
|
||||||
|
(string content, Tag[] tags) ExtractTags(string messageContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class StatusExtractor : IStatusExtractor
|
||||||
|
{
|
||||||
|
private readonly InstanceSettings _instanceSettings;
|
||||||
|
|
||||||
|
#region Ctor
|
||||||
|
public StatusExtractor(InstanceSettings instanceSettings)
|
||||||
|
{
|
||||||
|
_instanceSettings = instanceSettings;
|
||||||
|
}
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
public (string content, Tag[] tags) ExtractTags(string messageContent)
|
||||||
|
{
|
||||||
|
var regex = new Regex(@"\W(\#[a-zA-Z0-9]+\b)(?!;)");
|
||||||
|
var match = regex.Matches(messageContent);
|
||||||
|
|
||||||
|
var tags = new List<Tag>();
|
||||||
|
foreach (var m in match)
|
||||||
|
{
|
||||||
|
var tag = m.ToString().Replace("#", string.Empty).Replace("\n", string.Empty).Trim();
|
||||||
|
var url = $"https://{_instanceSettings.Domain}/tags/{tag}";
|
||||||
|
|
||||||
|
tags.Add(new Tag
|
||||||
|
{
|
||||||
|
name = $"#{tag}",
|
||||||
|
href = url,
|
||||||
|
type = "Hashtag"
|
||||||
|
});
|
||||||
|
|
||||||
|
messageContent = Regex.Replace(messageContent, m.ToString(),
|
||||||
|
$@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
|
||||||
|
|
||||||
|
//messageContent = messageContent.Replace(
|
||||||
|
// $"#{tag}",
|
||||||
|
// $@"<a href=""{url}"" class=""mention hashtag"" rel=""tag"">#<span>{tag}</span></a>");
|
||||||
|
}
|
||||||
|
|
||||||
|
return (messageContent, new Tag[0]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -20,29 +20,29 @@ namespace BirdsiteLive.Domain.Tests
|
||||||
}
|
}
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
[TestMethod]
|
// [TestMethod]
|
||||||
public void ExtractMentionsTest()
|
// public void ExtractMentionsTest()
|
||||||
{
|
// {
|
||||||
#region Stubs
|
// #region Stubs
|
||||||
var username = "MyUserName";
|
// var username = "MyUserName";
|
||||||
var extractedTweet = new ExtractedTweet
|
// var extractedTweet = new ExtractedTweet
|
||||||
{
|
// {
|
||||||
Id = 124L,
|
// Id = 124L,
|
||||||
CreatedAt = DateTime.UtcNow,
|
// CreatedAt = DateTime.UtcNow,
|
||||||
MessageContent = @"Getting ready for the weekend...have a great one everyone!
|
// MessageContent = @"Getting ready for the weekend...have a great one everyone!
|
||||||
|
//
|
||||||
Photo by Tim Tronckoe | @timtronckoe
|
//Photo by Tim Tronckoe | @timtronckoe
|
||||||
|
//
|
||||||
#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo"
|
//#archenemy #michaelamott #alissawhitegluz #jeffloomis #danielerlandsson #sharleedangelo"
|
||||||
};
|
// };
|
||||||
#endregion
|
// #endregion
|
||||||
|
|
||||||
var service = new StatusService(_settings);
|
// var service = new StatusService(_settings);
|
||||||
var result = service.GetStatus(username, extractedTweet);
|
// var result = service.GetStatus(username, extractedTweet);
|
||||||
|
|
||||||
#region Validations
|
// #region Validations
|
||||||
|
|
||||||
#endregion
|
// #endregion
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
using System;
|
||||||
|
using BirdsiteLive.Common.Settings;
|
||||||
|
using BirdsiteLive.Domain.Tools;
|
||||||
|
using BirdsiteLive.Twitter.Models;
|
||||||
|
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||||
|
|
||||||
|
namespace BirdsiteLive.Domain.Tests.Tools
|
||||||
|
{
|
||||||
|
[TestClass]
|
||||||
|
public class StatusExtractorTests
|
||||||
|
{
|
||||||
|
private readonly InstanceSettings _settings;
|
||||||
|
|
||||||
|
#region Ctor
|
||||||
|
public StatusExtractorTests()
|
||||||
|
{
|
||||||
|
_settings = new InstanceSettings
|
||||||
|
{
|
||||||
|
Domain = "domain.name"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void Extract_SingleTag_Test()
|
||||||
|
{
|
||||||
|
#region Stubs
|
||||||
|
var message = $"Bla!{Environment.NewLine}#mytag";
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
var service = new StatusExtractor(_settings);
|
||||||
|
var result = service.ExtractTags(message);
|
||||||
|
|
||||||
|
#region Validations
|
||||||
|
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||||
|
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
|
||||||
|
#endregion
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void Extract_MultiTags_Test()
|
||||||
|
{
|
||||||
|
#region Stubs
|
||||||
|
var message = $"Bla!{Environment.NewLine}#mytag #mytag2 #mytag3{Environment.NewLine}Test #bal Test";
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
var service = new StatusExtractor(_settings);
|
||||||
|
var result = service.ExtractTags(message);
|
||||||
|
|
||||||
|
#region Validations
|
||||||
|
Assert.IsTrue(result.content.Contains("Bla!"));
|
||||||
|
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag"" class=""mention hashtag"" rel=""tag"">#<span>mytag</span></a>"));
|
||||||
|
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag2"" class=""mention hashtag"" rel=""tag"">#<span>mytag2</span></a>"));
|
||||||
|
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/mytag3"" class=""mention hashtag"" rel=""tag"">#<span>mytag3</span></a>"));
|
||||||
|
Assert.IsTrue(result.content.Contains(@"<a href=""https://domain.name/tags/bal"" class=""mention hashtag"" rel=""tag"">#<span>bal</span></a>"));
|
||||||
|
#endregion
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue