Unity 提取字符串中的emoji
在Unity中使用TextMeshPro加载Emoji时,普通的Emoji(如😀)可以正常显示,但组合型Emoji(如👨🏫或👧🏿)会被分开显示,无法正确识别。为了解决这一问题,可以参考GitHub上的emoji.wpf项目,通过提取组合型Emoji并替换为<sprite>富文本或其他方式来实现正确显示。文章提供了相关代码示例,展示了如何通过正则表达式匹配和提取Emoji,并
·
参考这个文章 Unity使用TextMeshPro加载Emoji | 星光与路人https://www.starloong.top/2024/06/01/Unity%E4%BD%BF%E7%94%A8TextMeshPro%E5%8A%A0%E8%BD%BDEmoji/index.html 在Unity里添加emoji
普通的emoji😀只对应一个Unicode
像这种2个emoji组合的👨🏫,还有带肤色的👧🏿,等其他组合型。Unity里无法识别,会分开显示。
参考这个项目,成功将组合型emoji提取出,后续可以自己替换成<sprite>富文本 或其他方式
github.comhttps://github.com/samhocevar/emoji.wpf
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using UnityEngine;
public class EmojiTest : MonoBehaviour
{
void Start(){
Do("ully-qualified # 👨🏫 man teacher 1F3F3 FE0F ; fully-qualified # 🏳️ white flag");
}
public void Do(string text)
{
if (string.IsNullOrEmpty(text))
return;
StringBuilder sb = new StringBuilder();
int pos = 0;
foreach (Match m in EmojiData.MatchOne.Matches(text))
{
if (m.Index != pos)
sb.Append(text.Substring(pos, m.Index - pos));
var emoji = text.Substring(m.Index, m.Length);
//这里可以提取出emoji,替换成<sprite>富文本
//sb.Append("<sprite name="emoji">");
Debug.Log(emoji);//可以断点查看emoji
pos = m.Index + m.Length;
}
if (pos != text.Length)
sb.Append(text.Substring(pos));
Debug.Log(sb.ToString());
}
}
public static class EmojiData
{
public static IDictionary<string, Emoji> LookupByText { get; private set; }
= new Dictionary<string, Emoji>();
public static IDictionary<string, Emoji> LookupByName { get; private set; }
= new Dictionary<string, Emoji>();
public static Regex MatchOne { get; private set; }
// FIXME: should we lazy load this? If the user calls Load() later, then
// this first Load() call will have been for nothing.
static EmojiData() => Load();
public static void Load() => ParseEmojiList();
public class Emoji
{
public string Name { get; set; }
public string Text { get; set; }
public bool HasVariations => VariationList.Count > 0;
/// <summary>
/// 变体,比如肤色
/// </summary>
public IList<Emoji> VariationList { get; } = new List<Emoji>();
public string Unicode { get; set; }
}
private static string m_match_one_string;
// FIXME: this could be read directly from emoji-test.txt.gz
private static List<string> SkinToneComponents = new List<string>
{
"🏻", // light skin tone
"🏼", // medium-light skin tone
"🏽", // medium skin tone
"🏾", // medium-dark skin tone
"🏿", // dark skin tone
};
private static List<string> HairStyleComponents = new List<string>
{
"🦰", // red hair
"🦱", // curly hair
"🦳", // white hair
"🦲", // bald
};
private static string ToColonSyntax(string s)
=> Regex.Replace(s.Trim().ToLowerInvariant(), "[^a-z0-9]+", "-");
private static void ParseEmojiList()
{
var match_sequence = new Regex(@"^([0-9a-fA-F ]+[0-9a-fA-F]).*; *([-a-z]*) *# [^ ]* (E[0-9.]* )?(.*)");
var match_skin_tone = new Regex($"({string.Join("|", SkinToneComponents)})");
var match_hair_style = new Regex($"({string.Join("|", HairStyleComponents)})");
var adult = "(👨|👩)(🏻|🏼|🏽|🏾|🏿)?";
var child = "(👦|👧|👶)(🏻|🏼|🏽|🏾|🏿)?";
var match_family = new Regex($"{adult}(\u200d{adult})*(\u200d{child})+");
var qualified_lut = new Dictionary<string, string>();
var alltext = new List<string>();
foreach (var line in EmojiDescriptionLines())
{
var m = match_sequence.Match(line);
if (m.Success)
{
string sequence = m.Groups[1].ToString();
string name = m.Groups[4].ToString();
string text = string.Join("", from n in sequence.Split(' ')
select char.ConvertFromUtf32(Convert.ToInt32(n, 16)));
bool has_modifier = false;
if (match_family.Match(text).Success)
{
// If this is a family emoji, no need to add it to our big matching
// regex, since the match_family regex is already included.
}
else
{
// Construct a regex to replace e.g. "🏻" with "(🏻|🏼|🏽|🏾|🏿)" in a big
// regex so that we can match all variations of this Emoji even if they are
// not in the standard.
bool has_nonfirst_modifier = false;
var regex_text = match_skin_tone.Replace(
match_hair_style.Replace(text, (x) =>
{
has_modifier = true;
has_nonfirst_modifier |= x.Value != HairStyleComponents[0];
return match_hair_style.ToString();
}), (x) =>
{
has_modifier = true;
has_nonfirst_modifier |= x.Value != SkinToneComponents[0];
return match_skin_tone.ToString();
});
if (!has_nonfirst_modifier)
alltext.Add(has_modifier ? regex_text : text);
}
// If there is already a differently-qualified version of this character, skip it.
// FIXME: this only works well if fully-qualified appears first in the list.
var unqualified = text.Replace("\ufe0f", "");
if (qualified_lut.ContainsKey(unqualified))
continue;
qualified_lut[unqualified] = text;
var emoji = new Emoji
{
Name = name,
Text = text,
Unicode = sequence,
//TODO 在这里对emoji进行自定义处理,方便后续使用
};
// FIXME: this prevents LookupByText from working with the unqualified version
LookupByText[text] = emoji;
LookupByName[ToColonSyntax(name)] = emoji;
// Get the left part of the name and check whether we’re a variation of an existing
// emoji. If so, append to that emoji. Otherwise, add to current subgroup.
// FIXME: does not work properly because variations can appear before the generic emoji
if (name.Contains(":") && LookupByName.TryGetValue(ToColonSyntax(name.Split(':')[0]), out var parent_emoji))
{
if (parent_emoji.VariationList.Count == 0)
parent_emoji.VariationList.Add(parent_emoji);
parent_emoji.VariationList.Add(emoji);
}
}
}
// Make U+fe0f optional in the regex so that we can match any combination.
// FIXME: this is the starting point to implement variation selectors.
var sortedtext = alltext.OrderByDescending(x => x.Length);
var match_other = string.Join("|", sortedtext)
.Replace("*", "[*]")
.Replace("\ufe0f", "\ufe0f?");
// Build a regex that matches any Emoji
m_match_one_string = match_family.ToString() + "|" + match_other;
MatchOne = new Regex("(" + m_match_one_string + ")");
}
/// <summary>
/// https://github.com/mattzheng/py-yanwenzi/blob/master/data/emoji-test.txt
/// </summary>
/// <returns></returns>
private static IEnumerable<string> EmojiDescriptionLines()
{
var txt = Resources.Load<TextAsset>("emoji-test");
return txt.text.Split('\r', '\n');
}
}
可以从这里下载
更多推荐
所有评论(0)