diff --git a/app/src/main/java/helium314/keyboard/latin/RichInputConnection.java b/app/src/main/java/helium314/keyboard/latin/RichInputConnection.java index 3d81249b1..95beb8683 100644 --- a/app/src/main/java/helium314/keyboard/latin/RichInputConnection.java +++ b/app/src/main/java/helium314/keyboard/latin/RichInputConnection.java @@ -40,8 +40,6 @@ import helium314.keyboard.latin.settings.SpacingAndPunctuations; import helium314.keyboard.latin.utils.CapsModeUtils; import helium314.keyboard.latin.utils.DebugLogUtils; import helium314.keyboard.latin.utils.NgramContextUtils; -import helium314.keyboard.latin.utils.ScriptUtils; -import helium314.keyboard.latin.utils.SpannableStringUtils; import helium314.keyboard.latin.utils.StatsUtils; import helium314.keyboard.latin.utils.TextRange; @@ -825,15 +823,6 @@ public final class RichInputConnection implements PrivateCommandPerformer { return NgramContextUtils.getNgramContextFromNthPreviousWord(prev, spacingAndPunctuations, n); } - private static boolean isPartOfCompositionForScript(final int codePoint, - final SpacingAndPunctuations spacingAndPunctuations, final String script) { - // We always consider word connectors part of compositions. - return spacingAndPunctuations.isWordConnector(codePoint) - // Otherwise, it's part of composition if it's part of script and not a separator. - || (!spacingAndPunctuations.isWordSeparator(codePoint) - && ScriptUtils.isLetterPartOfScript(codePoint, script)); - } - /** * Returns the text surrounding the cursor. * @@ -860,90 +849,7 @@ public final class RichInputConnection implements PrivateCommandPerformer { if (before == null || after == null) { return null; } - - // Going backward, find the first breaking point (separator) - int startIndexInBefore = before.length(); - int endIndexInAfter = -1; - while (startIndexInBefore > 0) { - final int codePoint = Character.codePointBefore(before, startIndexInBefore); - if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) { - if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces) - break; - // continue to the next whitespace and see whether this contains a sometimesWordConnector - for (int i = startIndexInBefore - 1; i >= 0; i--) { - final char c = before.charAt(i); - if (spacingAndPunctuations.isSometimesWordConnector(c)) { - // if yes -> whitespace is the index - startIndexInBefore = Math.max(StringUtils.charIndexOfLastWhitespace(before), 0); - final int firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after); - endIndexInAfter = firstSpaceAfter == -1 ? after.length() : firstSpaceAfter -1; - break; - } else if (Character.isWhitespace(c)) { - // if no, just break normally - break; - } - } - break; - } - --startIndexInBefore; - if (Character.isSupplementaryCodePoint(codePoint)) { - --startIndexInBefore; - } - } - - // Find last word separator after the cursor - if (endIndexInAfter == -1) { - while (++endIndexInAfter < after.length()) { - final int codePoint = Character.codePointAt(after, endIndexInAfter); - if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) { - if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces) - break; - // continue to the next whitespace and see whether this contains a sometimesWordConnector - for (int i = endIndexInAfter; i < after.length(); i++) { - final char c = after.charAt(i); - if (spacingAndPunctuations.isSometimesWordConnector(c)) { - // if yes -> whitespace is next to the index - startIndexInBefore = Math.max(StringUtils.charIndexOfLastWhitespace(before), 0); - final int firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after); - endIndexInAfter = firstSpaceAfter == -1 ? after.length() : firstSpaceAfter - 1; - break; - } else if (Character.isWhitespace(c)) { - // if no, just break normally - break; - } - } - break; - } - if (Character.isSupplementaryCodePoint(codePoint)) { - ++endIndexInAfter; - } - } - } - - // strip stuff before "//" (i.e. ignore http and other protocols) - final String beforeConsideringStart = before.subSequence(startIndexInBefore, before.length()).toString(); - final int protocolEnd = beforeConsideringStart.lastIndexOf("//"); - if (protocolEnd != -1) - startIndexInBefore += protocolEnd + 1; - - // we don't want the end characters to be word separators - while (endIndexInAfter > 0 && spacingAndPunctuations.isWordSeparator(after.charAt(endIndexInAfter - 1))) { - --endIndexInAfter; - } - while (startIndexInBefore < before.length() && spacingAndPunctuations.isWordSeparator(before.charAt(startIndexInBefore))) { - ++startIndexInBefore; - } - - final boolean hasUrlSpans = - SpannableStringUtils.hasUrlSpans(before, startIndexInBefore, before.length()) - || SpannableStringUtils.hasUrlSpans(after, 0, endIndexInAfter); - // We don't use TextUtils#concat because it copies all spans without respect to their - // nature. If the text includes a PARAGRAPH span and it has been split, then - // TextUtils#concat will crash when it tries to concat both sides of it. - return new TextRange( - SpannableStringUtils.concatWithNonParagraphSuggestionSpansOnly(before, after), - startIndexInBefore, before.length() + endIndexInAfter, before.length(), - hasUrlSpans); + return StringUtilsKt.getTouchedWordRange(before, after, script, spacingAndPunctuations); } public boolean isCursorTouchingWord(final SpacingAndPunctuations spacingAndPunctuations, @@ -956,19 +862,7 @@ public final class RichInputConnection implements PrivateCommandPerformer { // a composing region should always count as a word return true; } - final String textBeforeCursor = mCommittedTextBeforeComposingText.toString(); - int indexOfCodePointInJavaChars = textBeforeCursor.length(); - int consideredCodePoint = 0 == indexOfCodePointInJavaChars ? Constants.NOT_A_CODE - : textBeforeCursor.codePointBefore(indexOfCodePointInJavaChars); - // Search for the first non word-connector char - if (spacingAndPunctuations.isWordConnector(consideredCodePoint)) { - indexOfCodePointInJavaChars -= Character.charCount(consideredCodePoint); - consideredCodePoint = 0 == indexOfCodePointInJavaChars ? Constants.NOT_A_CODE - : textBeforeCursor.codePointBefore(indexOfCodePointInJavaChars); - } - return !(Constants.NOT_A_CODE == consideredCodePoint - || spacingAndPunctuations.isWordSeparator(consideredCodePoint) - || spacingAndPunctuations.isWordConnector(consideredCodePoint)); + return StringUtilsKt.endsWithWordCodepoint(mCommittedTextBeforeComposingText.toString(), spacingAndPunctuations); } public boolean isCursorFollowedByWordCharacter( diff --git a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt index 982be6919..5a501aaad 100644 --- a/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt +++ b/app/src/main/java/helium314/keyboard/latin/common/StringUtils.kt @@ -6,13 +6,18 @@ import helium314.keyboard.keyboard.internal.keyboard_parser.floris.KeyCode import helium314.keyboard.latin.common.StringUtils.mightBeEmoji import helium314.keyboard.latin.common.StringUtils.newSingleCodePointString import helium314.keyboard.latin.settings.SpacingAndPunctuations +import helium314.keyboard.latin.utils.ScriptUtils import helium314.keyboard.latin.utils.SpacedTokens +import helium314.keyboard.latin.utils.SpannableStringUtils +import helium314.keyboard.latin.utils.TextRange import java.math.BigInteger import java.util.Locale +import kotlin.math.max fun CharSequence.codePointAt(offset: Int) = Character.codePointAt(this, offset) fun CharSequence.codePointBefore(offset: Int) = Character.codePointBefore(this, offset) +/** Loops over the codepoints in [text]. Exits when [loop] returns true */ inline fun loopOverCodePoints(text: CharSequence, loop: (cp: Int, charCount: Int) -> Boolean) { var offset = 0 while (offset < text.length) { @@ -23,6 +28,7 @@ inline fun loopOverCodePoints(text: CharSequence, loop: (cp: Int, charCount: Int } } +/** Loops backwards over the codepoints in [text]. Exits when [loop] returns true */ inline fun loopOverCodePointsBackwards(text: CharSequence, loop: (cp: Int, charCount: Int) -> Boolean) { var offset = text.length while (offset > 0) { @@ -88,6 +94,111 @@ fun getFullEmojiAtEnd(text: CharSequence): String { return s.substring(offset) } +/** + * Returns whether the [text] does not end with word separator, ignoring all word connectors. + * If the [text] is empty (after ignoring word connectors), the method returns false. + */ +// todo: this returns true on numbers, why isn't Character.isLetter(code) used? +fun endsWithWordCodepoint(text: String, spacingAndPunctuations: SpacingAndPunctuations): Boolean { + if (text.isEmpty()) return false + var codePoint = 0 // initial value irrelevant since length is always > 0 + loopOverCodePointsBackwards(text) { cp, _ -> + codePoint = cp + !spacingAndPunctuations.isWordConnector(cp) + } + // codePoint might still be a wordConnector (if text consists of wordConnectors) + return !spacingAndPunctuations.isWordConnector(codePoint) && !spacingAndPunctuations.isWordSeparator(codePoint) +} + +// todo: simplify... maybe compare with original code? +fun getTouchedWordRange(before: CharSequence, after: CharSequence, script: String, spacingAndPunctuations: SpacingAndPunctuations): TextRange { + // Going backward, find the first breaking point (separator) + var startIndexInBefore = before.length + var endIndexInAfter = -1 // todo: clarify why might we want to set it when checking before + loopOverCodePointsBackwards(before) { codePoint, cpLength -> + if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) { + if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces) + return@loopOverCodePointsBackwards true + // continue to the next whitespace and see whether this contains a sometimesWordConnector + for (i in startIndexInBefore - 1 downTo 0) { + val c = before[i] + if (spacingAndPunctuations.isSometimesWordConnector(c.code)) { + // if yes -> whitespace is the index + startIndexInBefore = max(StringUtils.charIndexOfLastWhitespace(before).toDouble(), 0.0).toInt() + val firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after) + endIndexInAfter = if (firstSpaceAfter == -1) after.length else firstSpaceAfter - 1 + return@loopOverCodePointsBackwards true + } else if (Character.isWhitespace(c)) { + // if no, just break normally + return@loopOverCodePointsBackwards true + } + } + return@loopOverCodePointsBackwards true + } + startIndexInBefore -= cpLength + false + } + + // Find last word separator after the cursor + if (endIndexInAfter == -1) { + endIndexInAfter = 0 + loopOverCodePoints(after) { codePoint, cpLength -> + if (!isPartOfCompositionForScript(codePoint, spacingAndPunctuations, script)) { + if (Character.isWhitespace(codePoint) || !spacingAndPunctuations.mCurrentLanguageHasSpaces) + return@loopOverCodePoints true + // continue to the next whitespace and see whether this contains a sometimesWordConnector + for (i in endIndexInAfter.. whitespace is next to the index + startIndexInBefore = max(StringUtils.charIndexOfLastWhitespace(before), 0) + val firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after) + endIndexInAfter = if (firstSpaceAfter == -1) after.length else firstSpaceAfter - 1 + return@loopOverCodePoints true + } else if (Character.isWhitespace(c)) { + // if no, just break normally + return@loopOverCodePoints true + } + } + return@loopOverCodePoints true + } + endIndexInAfter += cpLength + false + } + } + + // strip text before "//" (i.e. ignore http and other protocols) + val beforeConsideringStart = before.substring(startIndexInBefore, before.length) + val protocolEnd = beforeConsideringStart.lastIndexOf("//") + if (protocolEnd != -1) startIndexInBefore += protocolEnd + 1 + + // we don't want the end characters to be word separators + while (endIndexInAfter > 0 && spacingAndPunctuations.isWordSeparator(after[endIndexInAfter - 1].code)) { + --endIndexInAfter + } + while (startIndexInBefore < before.length && spacingAndPunctuations.isWordSeparator(before[startIndexInBefore].code)) { + ++startIndexInBefore + } + + val hasUrlSpans = SpannableStringUtils.hasUrlSpans(before, startIndexInBefore, before.length) + || SpannableStringUtils.hasUrlSpans(after, 0, endIndexInAfter) + + // We don't use TextUtils#concat because it copies all spans without respect to their + // nature. If the text includes a PARAGRAPH span and it has been split, then + // TextUtils#concat will crash when it tries to concat both sides of it. + return TextRange( + SpannableStringUtils.concatWithNonParagraphSuggestionSpansOnly(before, after), + startIndexInBefore, before.length + endIndexInAfter, before.length, + hasUrlSpans + ) +} + +// actually this should not be in STRING Utils, but only used for getTouchedWordRange +private fun isPartOfCompositionForScript(codePoint: Int, spacingAndPunctuations: SpacingAndPunctuations, script: String) = + spacingAndPunctuations.isWordConnector(codePoint) // We always consider word connectors part of compositions. + // Otherwise, it's part of composition if it's part of script and not a separator. + || (!spacingAndPunctuations.isWordSeparator(codePoint) && ScriptUtils.isLetterPartOfScript(codePoint, script)) + /** split the string on the first of consecutive space only, further consecutive spaces are added to the next split */ fun String.splitOnFirstSpacesOnly(): List { val out = mutableListOf() diff --git a/app/src/main/java/helium314/keyboard/latin/utils/TextRange.java b/app/src/main/java/helium314/keyboard/latin/utils/TextRange.java index 659e01a95..1a0e81fdf 100644 --- a/app/src/main/java/helium314/keyboard/latin/utils/TextRange.java +++ b/app/src/main/java/helium314/keyboard/latin/utils/TextRange.java @@ -7,9 +7,13 @@ package helium314.keyboard.latin.utils; import android.text.Spanned; +import android.text.TextUtils; import android.text.style.SuggestionSpan; +import androidx.annotation.NonNull; + import java.util.Arrays; +import java.util.Objects; /** * Represents a range of text, relative to the current cursor position. @@ -95,6 +99,28 @@ public final class TextRange { return writeIndex == readIndex ? spans : Arrays.copyOfRange(spans, 0, writeIndex); } + @Override + public boolean equals(Object other) { + if (!(other instanceof TextRange textRange)) return false; + return mWordAtCursorStartIndex == textRange.mWordAtCursorStartIndex + && mWordAtCursorEndIndex == textRange.mWordAtCursorEndIndex + && mCursorIndex == textRange.mCursorIndex + && mHasUrlSpans == textRange.mHasUrlSpans + && TextUtils.equals(mTextAtCursor, textRange.mTextAtCursor) + && TextUtils.equals(mWord, textRange.mWord); + } + + @Override + public int hashCode() { + return Objects.hash(mTextAtCursor, mWordAtCursorStartIndex, mWordAtCursorEndIndex, mCursorIndex, mWord, mHasUrlSpans); + } + + @NonNull + @Override + public String toString() { + return mTextAtCursor + ", " + mWord + ", " + mCursorIndex; + } + public TextRange(final CharSequence textAtCursor, final int wordAtCursorStartIndex, final int wordAtCursorEndIndex, final int cursorIndex, final boolean hasUrlSpans) { if (wordAtCursorStartIndex < 0 || cursorIndex < wordAtCursorStartIndex @@ -109,4 +135,4 @@ public final class TextRange { mHasUrlSpans = hasUrlSpans; mWord = mTextAtCursor.subSequence(mWordAtCursorStartIndex, mWordAtCursorEndIndex); } -} \ No newline at end of file +} diff --git a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt index c7339d047..1971f41c6 100644 --- a/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt +++ b/app/src/test/java/helium314/keyboard/latin/StringUtilsTest.kt @@ -4,9 +4,13 @@ package helium314.keyboard.latin import androidx.test.core.app.ApplicationProvider import helium314.keyboard.ShadowInputMethodManager2 import helium314.keyboard.latin.common.StringUtils +import helium314.keyboard.latin.common.endsWithWordCodepoint import helium314.keyboard.latin.common.getFullEmojiAtEnd +import helium314.keyboard.latin.common.getTouchedWordRange import helium314.keyboard.latin.common.nonWordCodePointAndNoSpaceBeforeCursor import helium314.keyboard.latin.settings.SpacingAndPunctuations +import helium314.keyboard.latin.utils.ScriptUtils +import helium314.keyboard.latin.utils.TextRange import org.junit.runner.RunWith import org.robolectric.RobolectricTestRunner import org.robolectric.annotation.Config @@ -60,6 +64,54 @@ class StringUtilsTest { assert(nonWordCodePointAndNoSpaceBeforeCursor("th.is", sp)) } + @Test fun `is word-like at end`() { + val sp = SpacingAndPunctuations(ApplicationProvider.getApplicationContext().resources, false) + assert(!endsWithWordCodepoint("", sp)) + assert(endsWithWordCodepoint("don'", sp)) + assert(!endsWithWordCodepoint("hello!", sp)) + assert(!endsWithWordCodepoint("when ", sp)) + assert(endsWithWordCodepoint("3-", sp)) // todo: this seems wrong + assert(endsWithWordCodepoint("5'", sp)) // todo: this seems wrong + assert(endsWithWordCodepoint("1", sp)) // todo: this seems wrong + assert(endsWithWordCodepoint("a-", sp)) + assert(!endsWithWordCodepoint("--", sp)) + } + + @Test fun `get touched text range`() { + val sp = SpacingAndPunctuations(ApplicationProvider.getApplicationContext().resources, false) + val spUrl = SpacingAndPunctuations(ApplicationProvider.getApplicationContext().resources, true) + val script = ScriptUtils.SCRIPT_LATIN + checkTextRange("blabla this is v", "ery good", sp, script, 15, 19) + checkTextRange(".hel", "lo...", sp, script, 1, 6) + checkTextRange("(hi", ")", sp, script, 1, 3) + checkTextRange("", "word", sp, script, 0, 4) + + checkTextRange("mail: blorb@", "florb.com or", sp, script, 12, 17) + checkTextRange("mail: blorb@", "florb.com or", spUrl, script, 6, 21) + checkTextRange("mail: blor", "b@florb.com or", sp, script, 6, 11) + checkTextRange("mail: blor", "b@florb.com or", spUrl, script, 6, 21) + checkTextRange("mail: blorb@f", "lorb.com or", sp, script, 12, 17) + checkTextRange("mail: blorb@f", "lorb.com or", spUrl, script, 6, 21) + + checkTextRange("http://exam", "ple.com", sp, script, 7, 14) + checkTextRange("http://exam", "ple.com", spUrl, script, 7, 18) + checkTextRange("http://example.", "com", sp, script, 15, 18) + checkTextRange("http://example.", "com", spUrl, script, 7, 18) + checkTextRange("htt", "p://example.com", sp, script, 0, 4) + checkTextRange("htt", "p://example.com", spUrl, script, 0, 18) + checkTextRange("http:/", "/example.com", sp, script, 6, 6) + checkTextRange("http:/", "/example.com", spUrl, script, 0, 18) + + checkTextRange("..", ".", spUrl, script, 2, 2) + checkTextRange("...", "", spUrl, script, 3, 3) + + // todo: these are bad cases of url detection + // also: sometimesWordConnectors are for URL and should be named accordingly + checkTextRange("@@@", "@@@", spUrl, script, 0, 6) + checkTextRange("a...", "", spUrl, script, 0, 4) + checkTextRange("@@@", "", spUrl, script, 0, 3) + } + @Test fun detectEmojisAtEnd() { assertEquals("", getFullEmojiAtEnd("\uD83C\uDF83 ")) assertEquals("", getFullEmojiAtEnd("a")) @@ -87,4 +139,10 @@ class StringUtilsTest { // could help towards fully fixing https://github.com/Helium314/HeliBoard/issues/22 // though this might be tricky, as some emojis will show as one on new Android versions, and // as two on older versions + + private fun checkTextRange(before: String, after: String, sp: SpacingAndPunctuations, script: String, wordStart: Int, WordEnd: Int) { + val got = getTouchedWordRange(before, after, script, sp) + val wanted = TextRange(before + after, wordStart, WordEnd, before.length, false) + assertEquals(wanted, got) + } }