diff --git a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt index 69e8243db..98e5bebab 100644 --- a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt +++ b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt @@ -62,21 +62,32 @@ fun getFullEmojiAtEnd(s: CharSequence): String { while (offset > 0) { val codepoint = text.codePointBefore(offset) // stop if codepoint can't be emoji - if (!mightBeEmoji(codepoint)) return "" + if (!mightBeEmoji(codepoint)) + return text.substring(offset) offset -= Character.charCount(codepoint) - // todo: if codepoint in 0x1F3FB..0x1F3FF -> combine with other emojis in front, but only if they actually combine - // why isn't this done with zwj like everything else? skin tones can be emojis by themselves... if (offset > 0 && text[offset - 1].code == KeyCode.ZWJ) { + // todo: this appends ZWJ in weird cases like text, ZWJ, emoji + // and detects single ZWJ as emoji (at least irrelevant for current use of getFullEmojiAtEnd) offset -= 1 continue } + + if (codepoint in 0x1F3FB..0x1F3FF) { + // Skin tones are not added with ZWJ, but just appended. This is not nice as they can be emojis on their own, + // but that's how it is done. Assume that an emoji before the skin tone will get merged (usually correct in practice) + val codepointBefore = text.codePointBefore(offset) + if (isEmoji(codepointBefore)) { + offset -= Character.charCount(codepointBefore) + continue + } + } // check the whole text after offset val textToCheck = text.substring(offset) if (isEmoji(textToCheck)) { return textToCheck } } - return "" + return text.substring(offset) } /** split the string on the first of consecutive space only, further consecutive spaces are added to the next split */ diff --git a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt index 518b6ffe1..e2bf6adbf 100644 --- a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt +++ b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt @@ -42,6 +42,8 @@ class StringUtilsTest { } @Test fun detectEmojisAtEnd() { + assertEquals("", getFullEmojiAtEnd("\uD83C\uDF83 ")) + assertEquals("", getFullEmojiAtEnd("a")) assertEquals("\uD83C\uDF83", getFullEmojiAtEnd("\uD83C\uDF83")) assertEquals("ℹ️", getFullEmojiAtEnd("ℹ️")) assertEquals("ℹ️", getFullEmojiAtEnd("ℹ️ℹ️")) @@ -51,6 +53,15 @@ class StringUtilsTest { assertEquals("\uD83C\uDFF3️\u200D\uD83C\uDF08", getFullEmojiAtEnd("\uD83C\uDFF3️\u200D\uD83C\uDF08")) assertEquals("\uD83C\uDFF3️\u200D\uD83C\uDF08", getFullEmojiAtEnd("\uD83C\uDFF4\u200D☠️\uD83C\uDFF3️\u200D\uD83C\uDF08")) assertEquals("\uD83C\uDFF3️\u200D⚧️", getFullEmojiAtEnd("hello there🏳️‍⚧️")) + assertEquals("\uD83D\uDD75\uD83C\uDFFC", getFullEmojiAtEnd(" 🕵🏼")) + assertEquals("\uD83D\uDD75\uD83C\uDFFC", getFullEmojiAtEnd("🕵🏼")) + assertEquals("\uD83C\uDFFC", getFullEmojiAtEnd(" \uD83C\uDFFC")) + // fails, but unlikely enough that we leave it unfixed + //assertEquals("\uD83C\uDFFC", getFullEmojiAtEnd("\uD83C\uDF84\uD83C\uDFFC")) + // below also fail, because ZWJ handling is not suitable for some unusual cases + //assertEquals("", getFullEmojiAtEnd("\u200D")) + //assertEquals("", getFullEmojiAtEnd("a\u200D")) + //assertEquals("\uD83D\uDE22", getFullEmojiAtEnd(" \u200D\uD83D\uDE22")) } // todo: add tests for emoji detection?