move some code from RichInputConnection to StringUtils

so we can easily add unit tests and maybe improve the awkward behavior
2025-06-25 10:30:53 +00:00 · 2025-06-09 20:20:27 +02:00 · 2025-06-09 20:20:27 +02:00 · 80ba394b95
commit 80ba394b95
parent 52744b7427
4 changed files with 198 additions and 109 deletions
--- a/app/src/main/java/helium314/keyboard/latin/RichInputConnection.java
+++ b/app/src/main/java/helium314/keyboard/latin/RichInputConnection.java
@ -40,8 +40,6 @@ import helium314.keyboard.latin.settings.SpacingAndPunctuations;
 import helium314.keyboard.latin.utils.CapsModeUtils;
 import helium314.keyboard.latin.utils.DebugLogUtils;
 import helium314.keyboard.latin.utils.NgramContextUtils;
 import helium314.keyboard.latin.utils.ScriptUtils;
 import helium314.keyboard.latin.utils.SpannableStringUtils;
 import helium314.keyboard.latin.utils.StatsUtils;
 import helium314.keyboard.latin.utils.TextRange;
@ -825,15 +823,6 @@ public final class RichInputConnection implements PrivateCommandPerformer {
        return NgramContextUtils.getNgramContextFromNthPreviousWord(prev, spacingAndPunctuations, n);
    }
    private static boolean isPartOfCompositionForScript(final int codePoint,
            final SpacingAndPunctuations spacingAndPunctuations, final String script) {
        // We always consider word connectors part of compositions.
        return spacingAndPunctuations.isWordConnector(codePoint)
                // Otherwise, it's part of composition if it's part of script and not a separator.
                || (!spacingAndPunctuations.isWordSeparator(codePoint)
                        && ScriptUtils.isLetterPartOfScript(codePoint, script));
    }
    /**
     * Returns the text surrounding the cursor.
     *
@ -860,90 +849,7 @@ public final class RichInputConnection implements PrivateCommandPerformer {
        if (before == null || after == null) {
            return null;
        }
-
+        return StringUtilsKt.getTouchedWordRange(before, after, script, spacingAndPunctuations);
        // Going backward, find the first breaking point (separator)
        int startIndexInBefore = before.length();
        int endIndexInAfter = -1;
        while (startIndexInBefore > 0) {
            final int codePoint = Character.codePointBefore(before, startIndexInBefore);
            if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) {
                if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces)
                    break;
                // continue to the next whitespace and see whether this contains a sometimesWordConnector
                for (int i = startIndexInBefore - 1; i >= 0; i--) {
                    final char c = before.charAt(i);
                    if (spacingAndPunctuations.isSometimesWordConnector(c)) {
                        // if yes -> whitespace is the index
                        startIndexInBefore = Math.max(StringUtils.charIndexOfLastWhitespace(before), 0);
                        final int firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after);
                        endIndexInAfter = firstSpaceAfter == -1 ? after.length() : firstSpaceAfter -1;
                        break;
                    } else if (Character.isWhitespace(c)) {
                        // if no, just break normally
                        break;
                    }
                }
                break;
            }
            --startIndexInBefore;
            if (Character.isSupplementaryCodePoint(codePoint)) {
                --startIndexInBefore;
            }
        }
        // Find last word separator after the cursor
        if (endIndexInAfter == -1) {
            while (++endIndexInAfter < after.length()) {
                final int codePoint = Character.codePointAt(after, endIndexInAfter);
                if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) {
                    if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces)
                        break;
                    // continue to the next whitespace and see whether this contains a sometimesWordConnector
                    for (int i = endIndexInAfter; i < after.length(); i++) {
                        final char c = after.charAt(i);
                        if (spacingAndPunctuations.isSometimesWordConnector(c)) {
                            // if yes -> whitespace is next to the index
                            startIndexInBefore = Math.max(StringUtils.charIndexOfLastWhitespace(before), 0);
                            final int firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after);
                            endIndexInAfter = firstSpaceAfter == -1 ? after.length() : firstSpaceAfter - 1;
                            break;
                        } else if (Character.isWhitespace(c)) {
                            // if no, just break normally
                            break;
                        }
                    }
                    break;
                }
                if (Character.isSupplementaryCodePoint(codePoint)) {
                    ++endIndexInAfter;
                }
            }
        }
        // strip stuff before "//" (i.e. ignore http and other protocols)
        final String beforeConsideringStart = before.subSequence(startIndexInBefore, before.length()).toString();
        final int protocolEnd = beforeConsideringStart.lastIndexOf("//");
        if (protocolEnd != -1)
            startIndexInBefore += protocolEnd + 1;
        // we don't want the end characters to be word separators
        while (endIndexInAfter > 0 && spacingAndPunctuations.isWordSeparator(after.charAt(endIndexInAfter - 1))) {
            --endIndexInAfter;
        }
        while (startIndexInBefore < before.length() && spacingAndPunctuations.isWordSeparator(before.charAt(startIndexInBefore))) {
            ++startIndexInBefore;
        }
        final boolean hasUrlSpans =
                SpannableStringUtils.hasUrlSpans(before, startIndexInBefore, before.length())
                || SpannableStringUtils.hasUrlSpans(after, 0, endIndexInAfter);
        // We don't use TextUtils#concat because it copies all spans without respect to their
        // nature. If the text includes a PARAGRAPH span and it has been split, then
        // TextUtils#concat will crash when it tries to concat both sides of it.
        return new TextRange(
                SpannableStringUtils.concatWithNonParagraphSuggestionSpansOnly(before, after),
                        startIndexInBefore, before.length() + endIndexInAfter, before.length(),
                        hasUrlSpans);
    }
    public boolean isCursorTouchingWord(final SpacingAndPunctuations spacingAndPunctuations,
@ -956,19 +862,7 @@ public final class RichInputConnection implements PrivateCommandPerformer {
            // a composing region should always count as a word
            return true;
        }
-        final String textBeforeCursor = mCommittedTextBeforeComposingText.toString();
+        return StringUtilsKt.endsWithWordCodepoint(mCommittedTextBeforeComposingText.toString(), spacingAndPunctuations);
        int indexOfCodePointInJavaChars = textBeforeCursor.length();
        int consideredCodePoint = 0 == indexOfCodePointInJavaChars ? Constants.NOT_A_CODE
                : textBeforeCursor.codePointBefore(indexOfCodePointInJavaChars);
        // Search for the first non word-connector char
        if (spacingAndPunctuations.isWordConnector(consideredCodePoint)) {
            indexOfCodePointInJavaChars -= Character.charCount(consideredCodePoint);
            consideredCodePoint = 0 == indexOfCodePointInJavaChars ? Constants.NOT_A_CODE
                    : textBeforeCursor.codePointBefore(indexOfCodePointInJavaChars);
        }
        return !(Constants.NOT_A_CODE == consideredCodePoint
                || spacingAndPunctuations.isWordSeparator(consideredCodePoint)
                || spacingAndPunctuations.isWordConnector(consideredCodePoint));
    }
    public boolean isCursorFollowedByWordCharacter(
--- a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt
+++ b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt
@ -6,13 +6,18 @@ import helium314.keyboard.keyboard.internal.keyboard_parser.floris.KeyCode
 import helium314.keyboard.latin.common.StringUtils.mightBeEmoji
 import helium314.keyboard.latin.common.StringUtils.newSingleCodePointString
 import helium314.keyboard.latin.settings.SpacingAndPunctuations
 import helium314.keyboard.latin.utils.ScriptUtils
 import helium314.keyboard.latin.utils.SpacedTokens
 import helium314.keyboard.latin.utils.SpannableStringUtils
 import helium314.keyboard.latin.utils.TextRange
 import java.math.BigInteger
 import java.util.Locale
 import kotlin.math.max
 fun CharSequence.codePointAt(offset: Int) = Character.codePointAt(this, offset)
 fun CharSequence.codePointBefore(offset: Int) = Character.codePointBefore(this, offset)
 /** Loops over the codepoints in [text]. Exits when [loop] returns true */
 inline fun loopOverCodePoints(text: CharSequence, loop: (cp: Int, charCount: Int) -> Boolean) {
    var offset = 0
    while (offset < text.length) {
@ -23,6 +28,7 @@ inline fun loopOverCodePoints(text: CharSequence, loop: (cp: Int, charCount: Int
    }
 }
 /** Loops backwards over the codepoints in [text]. Exits when [loop] returns true */
 inline fun loopOverCodePointsBackwards(text: CharSequence, loop: (cp: Int, charCount: Int) -> Boolean) {
    var offset = text.length
    while (offset > 0) {
@ -88,6 +94,111 @@ fun getFullEmojiAtEnd(text: CharSequence): String {
    return s.substring(offset)
 }
 /**
 *  Returns whether the [text] does not end with word separator, ignoring all word connectors.
 *  If the [text] is empty (after ignoring word connectors), the method returns false.
 */
 // todo: this returns true on numbers, why isn't Character.isLetter(code) used?
 fun endsWithWordCodepoint(text: String, spacingAndPunctuations: SpacingAndPunctuations): Boolean {
    if (text.isEmpty()) return false
    var codePoint = 0 // initial value irrelevant since length is always > 0
    loopOverCodePointsBackwards(text) { cp, _ ->
        codePoint = cp
        !spacingAndPunctuations.isWordConnector(cp)
    }
    // codePoint might still be a wordConnector (if text consists of wordConnectors)
    return !spacingAndPunctuations.isWordConnector(codePoint) && !spacingAndPunctuations.isWordSeparator(codePoint)
 }
 // todo: simplify... maybe compare with original code?
 fun getTouchedWordRange(before: CharSequence, after: CharSequence, script: String, spacingAndPunctuations: SpacingAndPunctuations): TextRange {
    // Going backward, find the first breaking point (separator)
    var startIndexInBefore = before.length
    var endIndexInAfter = -1 // todo: clarify why might we want to set it when checking before
    loopOverCodePointsBackwards(before) { codePoint, cpLength ->
        if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) {
            if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces)
                return@loopOverCodePointsBackwards true
            // continue to the next whitespace and see whether this contains a sometimesWordConnector
            for (i in startIndexInBefore - 1 downTo 0) {
                val c = before[i]
                if (spacingAndPunctuations.isSometimesWordConnector(c.code)) {
                    // if yes -> whitespace is the index
                    startIndexInBefore = max(StringUtils.charIndexOfLastWhitespace(before).toDouble(), 0.0).toInt()
                    val firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after)
                    endIndexInAfter = if (firstSpaceAfter == -1) after.length else firstSpaceAfter - 1
                    return@loopOverCodePointsBackwards true
                } else if (Character.isWhitespace(c)) {
                    // if no, just break normally
                    return@loopOverCodePointsBackwards true
                }
            }
            return@loopOverCodePointsBackwards true
        }
        startIndexInBefore -= cpLength
        false
    }
    // Find last word separator after the cursor
    if (endIndexInAfter == -1) {
        endIndexInAfter = 0
        loopOverCodePoints(after) { codePoint, cpLength ->
            if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) {
                if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces)
                    return@loopOverCodePoints true
                // continue to the next whitespace and see whether this contains a sometimesWordConnector
                for (i in endIndexInAfter..<after.length) {
                    val c = after[i]
                    if (spacingAndPunctuations.isSometimesWordConnector(c.code)) {
                        // if yes -> whitespace is next to the index
                        startIndexInBefore = max(StringUtils.charIndexOfLastWhitespace(before), 0)
                        val firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after)
                        endIndexInAfter = if (firstSpaceAfter == -1) after.length else firstSpaceAfter - 1
                        return@loopOverCodePoints true
                    } else if (Character.isWhitespace(c)) {
                        // if no, just break normally
                        return@loopOverCodePoints true
                    }
                }
                return@loopOverCodePoints true
            }
            endIndexInAfter += cpLength
            false
        }
    }
    // strip text before "//" (i.e. ignore http and other protocols)
    val beforeConsideringStart = before.substring(startIndexInBefore, before.length)
    val protocolEnd = beforeConsideringStart.lastIndexOf("//")
    if (protocolEnd != -1) startIndexInBefore += protocolEnd + 1
    // we don't want the end characters to be word separators
    while (endIndexInAfter > 0 && spacingAndPunctuations.isWordSeparator(after[endIndexInAfter - 1].code)) {
        --endIndexInAfter
    }
    while (startIndexInBefore < before.length && spacingAndPunctuations.isWordSeparator(before[startIndexInBefore].code)) {
        ++startIndexInBefore
    }
    val hasUrlSpans = SpannableStringUtils.hasUrlSpans(before, startIndexInBefore, before.length)
        || SpannableStringUtils.hasUrlSpans(after, 0, endIndexInAfter)
    // We don't use TextUtils#concat because it copies all spans without respect to their
    // nature. If the text includes a PARAGRAPH span and it has been split, then
    // TextUtils#concat will crash when it tries to concat both sides of it.
    return TextRange(
        SpannableStringUtils.concatWithNonParagraphSuggestionSpansOnly(before, after),
        startIndexInBefore, before.length + endIndexInAfter, before.length,
        hasUrlSpans
    )
 }
 // actually this should not be in STRING Utils, but only used for getTouchedWordRange
 private fun isPartOfCompositionForScript(codePoint: Int, spacingAndPunctuations: SpacingAndPunctuations, script: String) =
    spacingAndPunctuations.isWordConnector(codePoint) // We always consider word connectors part of compositions.
        // Otherwise, it's part of composition if it's part of script and not a separator.
        || (!spacingAndPunctuations.isWordSeparator(codePoint) && ScriptUtils.isLetterPartOfScript(codePoint, script))
 /** split the string on the first of consecutive space only, further consecutive spaces are added to the next split */
 fun String.splitOnFirstSpacesOnly(): List<String> {
    val out = mutableListOf<String>()
--- a/app/src/main/java/helium314/keyboard/latin/utils/TextRange.java
+++ b/app/src/main/java/helium314/keyboard/latin/utils/TextRange.java
@ -7,9 +7,13 @@
 package helium314.keyboard.latin.utils;
 import android.text.Spanned;
 import android.text.TextUtils;
 import android.text.style.SuggestionSpan;
 import androidx.annotation.NonNull;
 import java.util.Arrays;
 import java.util.Objects;
 /**
 * Represents a range of text, relative to the current cursor position.
@ -95,6 +99,28 @@ public final class TextRange {
        return writeIndex == readIndex ? spans : Arrays.copyOfRange(spans, 0, writeIndex);
    }
    @Override
    public boolean equals(Object other) {
        if (!(other instanceof TextRange textRange)) return false;
        return mWordAtCursorStartIndex == textRange.mWordAtCursorStartIndex
            && mWordAtCursorEndIndex == textRange.mWordAtCursorEndIndex
            && mCursorIndex == textRange.mCursorIndex
            && mHasUrlSpans == textRange.mHasUrlSpans
            && TextUtils.equals(mTextAtCursor, textRange.mTextAtCursor)
            && TextUtils.equals(mWord, textRange.mWord);
    }
    @Override
    public int hashCode() {
        return Objects.hash(mTextAtCursor, mWordAtCursorStartIndex, mWordAtCursorEndIndex, mCursorIndex, mWord, mHasUrlSpans);
    }
    @NonNull
    @Override
    public String toString() {
        return mTextAtCursor + ", " + mWord + ", " + mCursorIndex;
    }
    public TextRange(final CharSequence textAtCursor, final int wordAtCursorStartIndex,
            final int wordAtCursorEndIndex, final int cursorIndex, final boolean hasUrlSpans) {
        if (wordAtCursorStartIndex < 0 || cursorIndex < wordAtCursorStartIndex
--- a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt
+++ b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt
@ -4,9 +4,13 @@ package helium314.keyboard.latin
 import androidx.test.core.app.ApplicationProvider
 import helium314.keyboard.ShadowInputMethodManager2
 import helium314.keyboard.latin.common.StringUtils
 import helium314.keyboard.latin.common.endsWithWordCodepoint
 import helium314.keyboard.latin.common.getFullEmojiAtEnd
 import helium314.keyboard.latin.common.getTouchedWordRange
 import helium314.keyboard.latin.common.nonWordCodePointAndNoSpaceBeforeCursor
 import helium314.keyboard.latin.settings.SpacingAndPunctuations
 import helium314.keyboard.latin.utils.ScriptUtils
 import helium314.keyboard.latin.utils.TextRange
 import org.junit.runner.RunWith
 import org.robolectric.RobolectricTestRunner
 import org.robolectric.annotation.Config
@ -60,6 +64,54 @@ class StringUtilsTest {
        assert(nonWordCodePointAndNoSpaceBeforeCursor("th.is", sp))
    }
    @Test fun `is word-like at end`() {
        val sp = SpacingAndPunctuations(ApplicationProvider.getApplicationContext<App>().resources, false)
        assert(!endsWithWordCodepoint("", sp))
        assert(endsWithWordCodepoint("don'", sp))
        assert(!endsWithWordCodepoint("hello!", sp))
        assert(!endsWithWordCodepoint("when ", sp))
        assert(endsWithWordCodepoint("3-", sp)) // todo: this seems wrong
        assert(endsWithWordCodepoint("5'", sp)) // todo: this seems wrong
        assert(endsWithWordCodepoint("1", sp)) // todo: this seems wrong
        assert(endsWithWordCodepoint("a-", sp))
        assert(!endsWithWordCodepoint("--", sp))
    }
    @Test fun `get touched text range`() {
        val sp = SpacingAndPunctuations(ApplicationProvider.getApplicationContext<App>().resources, false)
        val spUrl = SpacingAndPunctuations(ApplicationProvider.getApplicationContext<App>().resources, true)
        val script = ScriptUtils.SCRIPT_LATIN
        checkTextRange("blabla this is v", "ery good", sp, script, 15, 19)
        checkTextRange(".hel", "lo...", sp, script, 1, 6)
        checkTextRange("(hi", ")", sp, script, 1, 3)
        checkTextRange("", "word", sp, script, 0, 4)
        checkTextRange("mail: blorb@", "florb.com or", sp, script, 12, 17)
        checkTextRange("mail: blorb@", "florb.com or", spUrl, script, 6, 21)
        checkTextRange("mail: blor", "b@florb.com or", sp, script, 6, 11)
        checkTextRange("mail: blor", "b@florb.com or", spUrl, script, 6, 21)
        checkTextRange("mail: blorb@f", "lorb.com or", sp, script, 12, 17)
        checkTextRange("mail: blorb@f", "lorb.com or", spUrl, script, 6, 21)
        checkTextRange("http://exam", "ple.com", sp, script, 7, 14)
        checkTextRange("http://exam", "ple.com", spUrl, script, 7, 18)
        checkTextRange("http://example.", "com", sp, script, 15, 18)
        checkTextRange("http://example.", "com", spUrl, script, 7, 18)
        checkTextRange("htt", "p://example.com", sp, script, 0, 4)
        checkTextRange("htt", "p://example.com", spUrl, script, 0, 18)
        checkTextRange("http:/", "/example.com", sp, script, 6, 6)
        checkTextRange("http:/", "/example.com", spUrl, script, 0, 18)
        checkTextRange("..", ".", spUrl, script, 2, 2)
        checkTextRange("...", "", spUrl, script, 3, 3)
        // todo: these are bad cases of url detection
        //  also: sometimesWordConnectors are for URL and should be named accordingly
        checkTextRange("@@@", "@@@", spUrl, script, 0, 6)
        checkTextRange("a...", "", spUrl, script, 0, 4)
        checkTextRange("@@@", "", spUrl, script, 0, 3)
    }
    @Test fun detectEmojisAtEnd() {
        assertEquals("", getFullEmojiAtEnd("\uD83C\uDF83 "))
        assertEquals("", getFullEmojiAtEnd("a"))
@ -87,4 +139,10 @@ class StringUtilsTest {
    //  could help towards fully fixing https://github.com/Helium314/HeliBoard/issues/22
    //  though this might be tricky, as some emojis will show as one on new Android versions, and
    //  as two on older versions
    private fun checkTextRange(before: String, after: String, sp: SpacingAndPunctuations, script: String, wordStart: Int, WordEnd: Int) {
        val got = getTouchedWordRange(before, after, script, sp)
        val wanted = TextRange(before + after, wordStart, WordEnd, before.length, false)
        assertEquals(wanted, got)
    }
 }