fix some issues with url detection, add tests for some more issues to be fixed

This commit is contained in:
Helium314 2023-09-25 10:10:56 +02:00
parent cf261bf5ef
commit bc1557cc69
6 changed files with 137 additions and 50 deletions

View file

@ -745,7 +745,6 @@ public final class RichInputConnection implements PrivateCommandPerformer {
}
// Going backward, find the first breaking point (separator)
// todo: break if there are 2 consecutive sometimesWordConnectors (more complicated once again, great...)
int startIndexInBefore = before.length();
int endIndexInAfter = -1;
while (startIndexInBefore > 0) {
@ -758,9 +757,9 @@ public final class RichInputConnection implements PrivateCommandPerformer {
final char c = before.charAt(i);
if (spacingAndPunctuations.isSometimesWordConnector(c)) {
// if yes -> whitespace is the index
startIndexInBefore = Math.max(StringUtils.charIndexOfLastWhitespace(before), 0);;
startIndexInBefore = Math.max(StringUtils.charIndexOfLastWhitespace(before), 0);
final int firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after);
endIndexInAfter = firstSpaceAfter == -1 ? (after.length() - 1) : firstSpaceAfter -1;
endIndexInAfter = firstSpaceAfter == -1 ? after.length() : firstSpaceAfter -1;
break;
} else if (Character.isWhitespace(c)) {
// if no, just break normally
@ -789,7 +788,7 @@ public final class RichInputConnection implements PrivateCommandPerformer {
// if yes -> whitespace is next to the index
startIndexInBefore = Math.max(StringUtils.charIndexOfLastWhitespace(before), 0);;
final int firstSpaceAfter = StringUtils.charIndexOfFirstWhitespace(after);
endIndexInAfter = firstSpaceAfter == -1 ? (after.length() - 1) : firstSpaceAfter - 1;
endIndexInAfter = firstSpaceAfter == -1 ? after.length() : firstSpaceAfter - 1;
break;
} else if (Character.isWhitespace(c)) {
// if no, just break normally
@ -804,6 +803,12 @@ public final class RichInputConnection implements PrivateCommandPerformer {
}
}
// strip stuff before "//" (i.e. ignore http and other protocols)
final String beforeConsideringStart = before.subSequence(startIndexInBefore, before.length()).toString();
final int protocolEnd = beforeConsideringStart.lastIndexOf("//");
if (protocolEnd != -1)
startIndexInBefore += protocolEnd + 1;
// we don't want the end characters to be word separators
while (endIndexInAfter > 0 && spacingAndPunctuations.isWordSeparator(after.charAt(endIndexInAfter - 1))) {
--endIndexInAfter;
@ -1004,16 +1009,26 @@ public final class RichInputConnection implements PrivateCommandPerformer {
return mCommittedTextBeforeComposingText.lastIndexOf(" ") < mCommittedTextBeforeComposingText.lastIndexOf("@");
}
public CharSequence textBeforeCursorUntilLastWhitespace() {
int afterLastSpace = 0;
public CharSequence textBeforeCursorUntilLastWhitespaceOrDoubleSlash() {
int startIndex = 0;
boolean previousWasSlash = false;
for (int i = mCommittedTextBeforeComposingText.length() - 1; i >= 0; i--) {
final char c = mCommittedTextBeforeComposingText.charAt(i);
if (Character.isWhitespace(c)) {
afterLastSpace = i + 1;
startIndex = i + 1;
break;
}
if (c == '/') {
if (previousWasSlash) {
startIndex = i + 2;
break;
}
return mCommittedTextBeforeComposingText.subSequence(afterLastSpace, mCommittedTextBeforeComposingText.length());
previousWasSlash = true;
} else {
previousWasSlash = false;
}
}
return mCommittedTextBeforeComposingText.subSequence(startIndex, mCommittedTextBeforeComposingText.length());
}
/**

View file

@ -895,7 +895,7 @@ public final class InputLogic {
// but not if there are two consecutive sometimesWordConnectors (e.g. "...bla")
&& !settingsValues.mSpacingAndPunctuations.isSometimesWordConnector(mConnection.getCharBeforeBeforeCursor())
) {
final CharSequence text = mConnection.textBeforeCursorUntilLastWhitespace();
final CharSequence text = mConnection.textBeforeCursorUntilLastWhitespaceOrDoubleSlash();
final TextRange range = new TextRange(text, 0, text.length(), text.length(), false);
isComposingWord = true;
restartSuggestions(range, mConnection.mExpectedSelStart);

View file

@ -138,8 +138,6 @@ public class SettingsValues {
public SettingsValues(final Context context, final SharedPreferences prefs, final Resources res,
@NonNull final InputAttributes inputAttributes) {
mLocale = res.getConfiguration().locale;
// Get the resources
mSpacingAndPunctuations = new SpacingAndPunctuations(res);
// Store the input attributes
mInputAttributes = inputAttributes;
@ -221,7 +219,6 @@ public class SettingsValues {
final InputMethodSubtype selectedSubtype = SubtypeSettingsKt.getSelectedSubtype(prefs);
mSecondaryLocales = Settings.getSecondaryLocales(prefs, selectedSubtype.getLocale());
mShowAllMoreKeys = selectedSubtype.isAsciiCapable() && prefs.getBoolean(Settings.PREF_SHOW_ALL_MORE_KEYS, false);
mColors = Settings.getColorsForCurrentTheme(context, prefs);
mAddToPersonalDictionary = prefs.getBoolean(Settings.PREF_ADD_TO_PERSONAL_DICTIONARY, false);
@ -234,6 +231,7 @@ public class SettingsValues {
);
mUrlDetectionEnabled = prefs.getBoolean(Settings.PREF_URL_DETECTION, false);
mPinnedKeys = Settings.readPinnedKeys(prefs);
mSpacingAndPunctuations = new SpacingAndPunctuations(res, mUrlDetectionEnabled);
}
public boolean isApplicationSpecifiedCompletionsOn() {

View file

@ -44,7 +44,7 @@ public final class SpacingAndPunctuations {
public final boolean mUsesAmericanTypography;
public final boolean mUsesGermanRules;
public SpacingAndPunctuations(final Resources res) {
public SpacingAndPunctuations(final Resources res, final Boolean urlDetection) {
// To be able to binary search the code point. See {@link #isUsuallyPrecededBySpace(int)}.
mSortedSymbolsPrecededBySpace = StringUtils.toSortedCodePointArray(res.getString(R.string.symbols_preceded_by_space));
// To be able to binary search the code point. See {@link #isUsuallyFollowedBySpace(int)}.
@ -60,7 +60,7 @@ public final class SpacingAndPunctuations {
mSentenceSeparator, Constants.CODE_SPACE }, 0, 2);
mCurrentLanguageHasSpaces = res.getBoolean(R.bool.current_language_has_spaces);
// make it empty if language doesn't have spaces, to avoid weird glitches
mSortedSometimesWordConnectors = mCurrentLanguageHasSpaces ? StringUtils.toSortedCodePointArray(res.getString(R.string.symbols_sometimes_word_connectors)) : new int[0];
mSortedSometimesWordConnectors = (urlDetection && mCurrentLanguageHasSpaces) ? StringUtils.toSortedCodePointArray(res.getString(R.string.symbols_sometimes_word_connectors)) : new int[0];
final Locale locale = res.getConfiguration().locale;
// Heuristic: we use American Typography rules because it's the most common rules for all
// English variants. German rules (not "German typography") also have small gotchas.

View file

@ -17,6 +17,7 @@
package org.dslul.openboard.inputmethod.latin.spellcheck;
import android.annotation.TargetApi;
import android.content.SharedPreferences;
import android.content.res.Resources;
import android.os.Build;
import android.view.textservice.SentenceSuggestionsInfo;
@ -26,6 +27,7 @@ import android.view.textservice.TextInfo;
import org.dslul.openboard.inputmethod.compat.TextInfoCompatUtils;
import org.dslul.openboard.inputmethod.latin.common.Constants;
import org.dslul.openboard.inputmethod.latin.settings.SpacingAndPunctuations;
import org.dslul.openboard.inputmethod.latin.utils.DeviceProtectedUtils;
import org.dslul.openboard.inputmethod.latin.utils.RunInLocale;
import java.util.ArrayList;
@ -82,7 +84,7 @@ public class SentenceLevelAdapter {
new RunInLocale<SpacingAndPunctuations>() {
@Override
protected SpacingAndPunctuations job(final Resources r) {
return new SpacingAndPunctuations(r);
return new SpacingAndPunctuations(r, false);
}
};
mSpacingAndPunctuations = job.runInLocale(res, locale);

View file

@ -11,6 +11,7 @@ import androidx.core.content.edit
import org.dslul.openboard.inputmethod.event.Event
import org.dslul.openboard.inputmethod.keyboard.KeyboardSwitcher
import org.dslul.openboard.inputmethod.keyboard.MainKeyboardView
import org.dslul.openboard.inputmethod.latin.ShadowFacilitator2.Companion.lastAddedWord
import org.dslul.openboard.inputmethod.latin.common.Constants
import org.dslul.openboard.inputmethod.latin.common.StringUtils
import org.dslul.openboard.inputmethod.latin.inputlogic.InputLogic
@ -177,24 +178,17 @@ class InputLogicTest {
@Test fun urlDetectionThings() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
input('.')
input('.')
input('.')
input('h')
chainInput("...h")
assertEquals("...h", text)
assertEquals("h", composingText)
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
input("bla")
input('.')
input('.')
chainInput("bla..")
assertEquals("bla..", text)
assertEquals("", composingText)
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
input("bla")
input('.')
input('c')
chainInput("bla.c")
assertEquals("bla.c", text)
assertEquals("bla.c", composingText)
reset()
@ -211,9 +205,7 @@ class InputLogicTest {
@Test fun stripSeparatorsBeforeAddingToHistoryWithURLDetection() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
setText("example.co")
input('m')
input('.')
chainInput("example.com.")
assertEquals("example.com.", composingText)
input(' ')
assertEquals("example.com", ShadowFacilitator2.lastAddedWord)
@ -222,10 +214,7 @@ class InputLogicTest {
@Test fun dontSelectConsecutiveSeparatorsWithURLDetection() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
setText("bl")
input('a')
input('.')
input('.')
chainInput("bla..")
assertEquals("", composingText)
assertEquals("bla..", text)
}
@ -249,29 +238,109 @@ class InputLogicTest {
assertEquals("", composingText)
}
@Test fun `don't select whole thing as composing word if URL detection disabled`() {
reset()
setText("http://example.com")
setCursorPosition(13) // between l and e
assertEquals("example", composingText)
}
@Test fun `select whole thing except http(s) as composing word if URL detection enabled and selecting`() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
setText("http://example.com")
setCursorPosition(13) // between l and e
assertEquals("example.com", composingText)
setText("http://bla.com http://example.com ")
setCursorPosition(29) // between l and e
assertEquals("example.com", composingText)
}
@Test fun `select whole thing except http(s) as composing word if URL detection enabled and typing`() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
chainInput("http://example.com")
assertEquals("example.com", composingText)
}
@Test fun protocolStrippedWhenAddingUrlToHistory() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
chainInput("http://bla.")
// todo, and is this really wanted? i guess yes...
// actually protocol shouldn't even be selected, maybe?
assertEquals("bla", lastAddedWord) // current state
assertEquals("", lastAddedWord) // that's the first goal here, right?
// todo: further, when entering actual full urls we really want the protocol stripped (at least for http(s), but possibly all)
}
@Test fun urlProperlySelected() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
setText("http://example.com/here")
setCursorPosition(18) // after .com
functionalKeyPress(Constants.CODE_DELETE)
functionalKeyPress(Constants.CODE_DELETE)
functionalKeyPress(Constants.CODE_DELETE) // delete com
// todo: do we want this?
// probably not
// when doing the same for a non-url we still have everything selected
// so at least don't do it wrong in both cases...
assertEquals("", composingText)
input('n')
input('e')
input('t')
assertEquals("http://example.net", composingText) // todo: maybe without http://?
}
@Test fun dontCommitPartialUrlBeforeFirstPeriod() {
reset()
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { putBoolean(Settings.PREF_URL_DETECTION, true) }
// type http://bla. -> bla not selected, but clearly url, also means http://bla is committed which we probably don't want
chainInput("http://bla.")
assertEquals("", composingText) // current state
assertEquals("bla.", composingText) // ideally we want "bla.", is this doable?
}
@Test fun `no intermediate commit in URL field`() {
reset()
setInputType(InputType.TYPE_CLASS_TEXT and InputType.TYPE_TEXT_VARIATION_URI)
chainInput("http://bla.com/img.jpg")
assertEquals("", lastAddedWord) // failing
assertEquals("http://bla.com/img.jpg", composingText) // maybe without the http://
}
@Test fun `no intermediate commit in URL field without protocol`() {
reset()
setInputType(InputType.TYPE_CLASS_TEXT and InputType.TYPE_TEXT_VARIATION_URI)
chainInput("http://bla.com/img.jpg")
assertEquals("", lastAddedWord) // failing
assertEquals("http://bla.com/img.jpg", composingText) // maybe without the http://
}
// ------- helper functions ---------
// should be called before every test, so the same state is guaranteed
private fun reset() {
// reset input connection
// reset input connection & facilitator
currentScript = ScriptUtils.SCRIPT_LATIN
text = ""
selectionStart = 0
selectionEnd = 0
composingStart = 0
composingEnd = 0
batchEdit = 0
currentInputType = InputType.TYPE_CLASS_TEXT
lastAddedWord = ""
// reset settings
DeviceProtectedUtils.getSharedPreferences(latinIME).edit { clear() }
currentInputType = InputType.TYPE_CLASS_TEXT
setText("")
setText("") // (re)sets selection and composing word
}
private fun chainInput(text: String) = text.forEach { input(it.code) }
private fun input(char: Char) = input(char.code)
private fun input(codePoint: Int) {
require(codePoint > 0) { "not a codePoint: $codePoint" }
val oldBefore = textBeforeCursor
val oldAfter = textAfterCursor
val insert = StringUtils.newSingleCodePointString(codePoint)
@ -287,6 +356,7 @@ class InputLogicTest {
}
private fun functionalKeyPress(keyCode: Int) {
require(keyCode < 0) { "not a functional key code: $keyCode" }
latinIME.onEvent(Event.createSoftwareKeypressEvent(Event.NOT_A_CODE_POINT, keyCode, Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE, false))
handleMessages()
checkConnectionConsistency()
@ -372,13 +442,20 @@ class InputLogicTest {
}
private fun checkConnectionConsistency() {
// RichInputConnection only has composing text up to cursor, but InputConnection has full composing text
val expectedConnectionComposingText = if (composingStart == -1 || composingEnd == -1) ""
else text.substring(composingStart, min(composingEnd, selectionEnd))
assert(composingText.startsWith(expectedConnectionComposingText))
// RichInputConnection only returns text up to cursor
val textBeforeComposingText = if (composingStart == -1) textBeforeCursor else text.substring(0, composingStart)
println("consistency: $selectionStart, ${connection.expectedSelectionStart}, $selectionEnd, ${connection.expectedSelectionEnd}, $textBeforeComposingText, " +
"$connectionTextBeforeComposingText, $composingText, $connectionComposingText, $textBeforeCursor, ${connection.getTextBeforeCursor(textBeforeCursor.length, 0)}" +
", $textAfterCursor, ${connection.getTextAfterCursor(textAfterCursor.length, 0)}")
assertEquals(selectionStart, connection.expectedSelectionStart)
assertEquals(selectionEnd, connection.expectedSelectionEnd)
assertEquals(textBeforeComposingText, connectionTextBeforeComposingText)
assertEquals(composingText, connectionComposingText)
assertEquals(expectedConnectionComposingText, connectionComposingText)
assertEquals(textBeforeCursor, connection.getTextBeforeCursor(textBeforeCursor.length, 0).toString())
assertEquals(textAfterCursor, connection.getTextAfterCursor(textAfterCursor.length, 0).toString())
}
@ -432,15 +509,9 @@ private val textAfterCursor get() = text.substring(selectionEnd)
private val selectedText get() = text.substring(selectionStart, selectionEnd)
private val cursor get() = if (selectionStart == selectionEnd) selectionStart else -1
// todo: maybe this is not correct? seems to return only up to the cursor
//private val textBeforeComposingText get() = if (composingStart == -1) text else text.substring(0, composingStart)
private val textBeforeComposingText get() = if (composingStart == -1) textBeforeCursor else text.substring(0, composingStart)
// todo: maybe this is not correct? seems to return only up to the cursor
//private val composingText get() = if (composingStart == -1 || composingEnd == -1) ""
// else text.substring(composingStart, composingEnd)
// composingText should return everything, but RichInputConnection.mComposingText only returns up to cursor
private val composingText get() = if (composingStart == -1 || composingEnd == -1) ""
else text.substring(composingStart, min(composingEnd, selectionEnd)) // will crash if composing start is after cursor, but maybe it should?
else text.substring(composingStart, composingEnd)
// essentially this is the text field we're editing in
private val ic = object : InputConnection {
@ -458,7 +529,7 @@ private val ic = object : InputConnection {
override fun setComposingText(newText: CharSequence, cursor: Int): Boolean {
// first remove the composing text if any
if (composingStart != -1 && composingEnd != -1)
text = textBeforeComposingText + text.substring(composingEnd)
text = text.substring(0, composingStart) + text.substring(composingEnd)
else // no composing span active, we should remove selected text
if (selectionStart != selectionEnd) {
text = textBeforeCursor + textAfterCursor
@ -483,6 +554,7 @@ private val ic = object : InputConnection {
return true
}
override fun setComposingRegion(p0: Int, p1: Int): Boolean {
println("setComposingRegion, $p0, $p1")
composingStart = p0
composingEnd = p1
return true // never checked