Unity 提取字符串中的emoji

在Unity中使用TextMeshPro加载Emoji时，普通的Emoji（如😀）可以正常显示，但组合型Emoji（如👨‍🏫或👧🏿）会被分开显示，无法正确识别。为了解决这一问题，可以参考GitHub上的emoji.wpf项目，通过提取组合型Emoji并替换为<sprite>富文本或其他方式来实现正确显示。文章提供了相关代码示例，展示了如何通过正则表达式匹配和提取Emoji，并

qq_24218843

314人浏览 · 2025-05-15 16:27:03

qq_24218843 · 2025-05-15 16:27:03 发布

参考这个文章 Unity使用TextMeshPro加载Emoji | 星光与路人https://www.starloong.top/2024/06/01/Unity%E4%BD%BF%E7%94%A8TextMeshPro%E5%8A%A0%E8%BD%BDEmoji/index.html 在Unity里添加emoji

普通的emoji😀只对应一个Unicode

像这种2个emoji组合的👨‍🏫，还有带肤色的👧🏿，等其他组合型。Unity里无法识别，会分开显示。

参考这个项目，成功将组合型emoji提取出，后续可以自己替换成<sprite>富文本或其他方式

github.comhttps://github.com/samhocevar/emoji.wpf

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using UnityEngine;

public class EmojiTest : MonoBehaviour
{
    void Start(){
        Do("ully-qualified     # 👨‍🏫 man teacher  1F3F3 FE0F    ; fully-qualified     # 🏳️ white flag");
    }
    public void Do(string text)
    {
        if (string.IsNullOrEmpty(text))
            return;

        StringBuilder sb = new StringBuilder();
        int pos = 0;
        foreach (Match m in EmojiData.MatchOne.Matches(text))
        {
            if (m.Index != pos)
                sb.Append(text.Substring(pos, m.Index - pos));

            var emoji = text.Substring(m.Index, m.Length);
            //这里可以提取出emoji，替换成<sprite>富文本
            //sb.Append("<sprite name="emoji">");
            Debug.Log(emoji);//可以断点查看emoji
            pos = m.Index + m.Length;
        }

        if (pos != text.Length)
            sb.Append(text.Substring(pos));

        Debug.Log(sb.ToString());
    }
}

public static class EmojiData
{
    public static IDictionary<string, Emoji> LookupByText { get; private set; }
        = new Dictionary<string, Emoji>();

    public static IDictionary<string, Emoji> LookupByName { get; private set; }
        = new Dictionary<string, Emoji>();

    public static Regex MatchOne { get; private set; }


    // FIXME: should we lazy load this? If the user calls Load() later, then
    // this first Load() call will have been for nothing.
    static EmojiData() => Load();

    public static void Load() => ParseEmojiList();


    public class Emoji
    {
        public string Name { get; set; }
        public string Text { get; set; }
        public bool HasVariations => VariationList.Count > 0;

        /// <summary>
        /// 变体，比如肤色
        /// </summary>
        public IList<Emoji> VariationList { get; } = new List<Emoji>();

        public string Unicode { get; set; }
    }

    private static string m_match_one_string;

    // FIXME: this could be read directly from emoji-test.txt.gz
    private static List<string> SkinToneComponents = new List<string>
    {
        "🏻", // light skin tone
        "🏼", // medium-light skin tone
        "🏽", // medium skin tone
        "🏾", // medium-dark skin tone
        "🏿", // dark skin tone
    };

    private static List<string> HairStyleComponents = new List<string>
    {
        "🦰", // red hair
        "🦱", // curly hair
        "🦳", // white hair
        "🦲", // bald
    };

    private static string ToColonSyntax(string s)
        => Regex.Replace(s.Trim().ToLowerInvariant(), "[^a-z0-9]+", "-");

    private static void ParseEmojiList()
    {
        var match_sequence = new Regex(@"^([0-9a-fA-F ]+[0-9a-fA-F]).*; *([-a-z]*) *# [^ ]* (E[0-9.]* )?(.*)");
        var match_skin_tone = new Regex($"({string.Join("|", SkinToneComponents)})");
        var match_hair_style = new Regex($"({string.Join("|", HairStyleComponents)})");

        var adult = "(👨|👩)(🏻|🏼|🏽|🏾|🏿)?";
        var child = "(👦|👧|👶)(🏻|🏼|🏽|🏾|🏿)?";
        var match_family = new Regex($"{adult}(\u200d{adult})*(\u200d{child})+");

        var qualified_lut = new Dictionary<string, string>();
        var alltext = new List<string>();

        foreach (var line in EmojiDescriptionLines())
        {
            var m = match_sequence.Match(line);
            if (m.Success)
            {
                string sequence = m.Groups[1].ToString();
                string name = m.Groups[4].ToString();

                string text = string.Join("", from n in sequence.Split(' ')
                    select char.ConvertFromUtf32(Convert.ToInt32(n, 16)));
                bool has_modifier = false;

                if (match_family.Match(text).Success)
                {
                    // If this is a family emoji, no need to add it to our big matching
                    // regex, since the match_family regex is already included.
                }
                else
                {
                    // Construct a regex to replace e.g. "🏻" with "(🏻|🏼|🏽|🏾|🏿)" in a big
                    // regex so that we can match all variations of this Emoji even if they are
                    // not in the standard.
                    bool has_nonfirst_modifier = false;
                    var regex_text = match_skin_tone.Replace(
                        match_hair_style.Replace(text, (x) =>
                        {
                            has_modifier = true;
                            has_nonfirst_modifier |= x.Value != HairStyleComponents[0];
                            return match_hair_style.ToString();
                        }), (x) =>
                        {
                            has_modifier = true;
                            has_nonfirst_modifier |= x.Value != SkinToneComponents[0];
                            return match_skin_tone.ToString();
                        });

                    if (!has_nonfirst_modifier)
                        alltext.Add(has_modifier ? regex_text : text);
                }

                // If there is already a differently-qualified version of this character, skip it.
                // FIXME: this only works well if fully-qualified appears first in the list.
                var unqualified = text.Replace("\ufe0f", "");
                if (qualified_lut.ContainsKey(unqualified))
                    continue;

                qualified_lut[unqualified] = text;

                var emoji = new Emoji
                {
                    Name = name,
                    Text = text,
                    Unicode = sequence,
                    //TODO 在这里对emoji进行自定义处理，方便后续使用
                };
                // FIXME: this prevents LookupByText from working with the unqualified version
                LookupByText[text] = emoji;
                LookupByName[ToColonSyntax(name)] = emoji;

                // Get the left part of the name and check whether we’re a variation of an existing
                // emoji. If so, append to that emoji. Otherwise, add to current subgroup.
                // FIXME: does not work properly because variations can appear before the generic emoji
                if (name.Contains(":") && LookupByName.TryGetValue(ToColonSyntax(name.Split(':')[0]), out var parent_emoji))
                {
                    if (parent_emoji.VariationList.Count == 0)
                        parent_emoji.VariationList.Add(parent_emoji);
                    parent_emoji.VariationList.Add(emoji);
                }
            }
        }

        // Make U+fe0f optional in the regex so that we can match any combination.
        // FIXME: this is the starting point to implement variation selectors.
        var sortedtext = alltext.OrderByDescending(x => x.Length);
        var match_other = string.Join("|", sortedtext)
            .Replace("*", "[*]")
            .Replace("\ufe0f", "\ufe0f?");

        // Build a regex that matches any Emoji
        m_match_one_string = match_family.ToString() + "|" + match_other;
        MatchOne = new Regex("(" + m_match_one_string + ")");
    }

    /// <summary>
    /// https://github.com/mattzheng/py-yanwenzi/blob/master/data/emoji-test.txt
    /// </summary>
    /// <returns></returns>
    private static IEnumerable<string> EmojiDescriptionLines()
    {
        var txt = Resources.Load<TextAsset>("emoji-test");
        return txt.text.Split('\r', '\n');
    }
}

可以从这里下载

https://download.csdn.net/download/qq_24218843/90839613https://download.csdn.net/download/qq_24218843/90839613

Unity官方开发者社区

分享前沿Unity技术干货和开发经验，精彩的Unity活动和社区相关信息

更多推荐

【unity游戏开发——热更新】YooAsset+HybridCLR代码热更新实践

Unity官方开发者社区

【unity游戏开发——热更新】HybridCLR代码热更新的使用

Unity官方开发者社区

Unity AR动物科普

在Unity中导入制作好的动物模型，同时使用Unity的地形工具或导入外部场景资源（如从Asset Store获取），搭建与动物习性相符的虚拟场景，如草原、森林、海洋；添加环境元素，如树木、岩石、水体、天空盒，调整光照和阴影效果，营造逼真的氛围。明确要科普的动物种类（如濒危物种、常见动物等），规划需展示的知识点（生活习性、身体结构、生存环境），确定应用面向的用户群体（儿童、学生、大众），根据不同群