improve detection of emojis when deleting

2025-06-10 16:39:35 +00:00 · 2025-02-03 00:11:34 +01:00 · 2025-02-03 00:11:34 +01:00 · 73988394a9
commit 73988394a9
parent ad3086183d
2 changed files with 26 additions and 4 deletions
--- a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt
+++ b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt
@ -62,21 +62,32 @@ fun getFullEmojiAtEnd(s: CharSequence): String {
    while (offset > 0) {
        val codepoint = text.codePointBefore(offset)
        // stop if codepoint can't be emoji
-        if (!mightBeEmoji(codepoint)) return ""
+        if (!mightBeEmoji(codepoint))
            return text.substring(offset)
        offset -= Character.charCount(codepoint)
        // todo: if codepoint in 0x1F3FB..0x1F3FF -> combine with other emojis in front, but only if they actually combine
        //  why isn't this done with zwj like everything else? skin tones can be emojis by themselves...
        if (offset > 0 && text[offset - 1].code == KeyCode.ZWJ) {
            // todo: this appends ZWJ in weird cases like text, ZWJ, emoji
            //  and detects single ZWJ as emoji (at least irrelevant for current use of getFullEmojiAtEnd)
            offset -= 1
            continue
        }
        if (codepoint in 0x1F3FB..0x1F3FF) {
            // Skin tones are not added with ZWJ, but just appended. This is not nice as they can be emojis on their own,
            // but that's how it is done. Assume that an emoji before the skin tone will get merged (usually correct in practice)
            val codepointBefore = text.codePointBefore(offset)
            if (isEmoji(codepointBefore)) {
                offset -= Character.charCount(codepointBefore)
                continue
            }
        }
        // check the whole text after offset
        val textToCheck = text.substring(offset)
        if (isEmoji(textToCheck)) {
            return textToCheck
        }
    }
-    return ""
+    return text.substring(offset)
 }
 /** split the string on the first of consecutive space only, further consecutive spaces are added to the next split */
--- a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt
+++ b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt
@ -42,6 +42,8 @@ class StringUtilsTest {
    }
    @Test fun detectEmojisAtEnd() {
        assertEquals("", getFullEmojiAtEnd("\uD83C\uDF83 "))
        assertEquals("", getFullEmojiAtEnd("a"))
        assertEquals("\uD83C\uDF83", getFullEmojiAtEnd("\uD83C\uDF83"))
        assertEquals("ℹ️", getFullEmojiAtEnd("ℹ️"))
        assertEquals("ℹ️", getFullEmojiAtEnd("ℹ️ℹ️"))
@ -51,6 +53,15 @@ class StringUtilsTest {
        assertEquals("\uD83C\uDFF3️\u200D\uD83C\uDF08", getFullEmojiAtEnd("\uD83C\uDFF3️\u200D\uD83C\uDF08"))
        assertEquals("\uD83C\uDFF3️\u200D\uD83C\uDF08", getFullEmojiAtEnd("\uD83C\uDFF4\u200D☠️\uD83C\uDFF3️\u200D\uD83C\uDF08"))
        assertEquals("\uD83C\uDFF3️\u200D⚧️", getFullEmojiAtEnd("hello there🏳️‍⚧️"))
        assertEquals("\uD83D\uDD75\uD83C\uDFFC", getFullEmojiAtEnd(" 🕵🏼"))
        assertEquals("\uD83D\uDD75\uD83C\uDFFC", getFullEmojiAtEnd("🕵🏼"))
        assertEquals("\uD83C\uDFFC", getFullEmojiAtEnd(" \uD83C\uDFFC"))
        // fails, but unlikely enough that we leave it unfixed
        //assertEquals("\uD83C\uDFFC", getFullEmojiAtEnd("\uD83C\uDF84\uD83C\uDFFC"))
        // below also fail, because ZWJ handling is not suitable for some unusual cases
        //assertEquals("", getFullEmojiAtEnd("\u200D"))
        //assertEquals("", getFullEmojiAtEnd("a\u200D"))
        //assertEquals("\uD83D\uDE22", getFullEmojiAtEnd(" \u200D\uD83D\uDE22"))
    }
    // todo: add tests for emoji detection?