add emojis for history for next-word suggestions

This commit is contained in:
Helium314 2023-10-20 13:11:05 +02:00
parent df59ea71c0
commit 2677e3eeb8
4 changed files with 85 additions and 18 deletions

View file

@ -700,23 +700,24 @@ public final class StringUtils {
return false; return false;
} }
public static boolean probablyContainsEmoji(final String s) { public static boolean mightBeEmoji(final String s) {
int offset = 0; int offset = 0;
final int length = s.length(); final int length = s.length();
while (offset < length) { while (offset < length) {
int c = Character.codePointAt(s, offset); int c = Character.codePointAt(s, offset);
if (probablyIsEmojiCodePoint(c)) if (mightBeEmoji(c))
return true; return true;
offset += Character.charCount(c); offset += Character.charCount(c);
} }
return false; return false;
} }
// seemingly arbitrary ranges taken from "somewhere on the internet" // unicode blocks that contain emojis
public static boolean probablyIsEmojiCodePoint(final int c) { // very fast check, but there are very few blocks that exclusively contain emojis,
return (0x200D <= c && c <= 0x3299) // ?? public static boolean mightBeEmoji(final int c) {
|| (0x1F004 <= c && c <= 0x1F251) // ?? return (0x200D <= c && c <= 0x2BFF) // unicode blocks from General Punctuation to Miscellaneous Symbols and Arrows
|| (0x1F300 <= c && c <= 0x1FFFF) // ?? || (0x1F104 <= c && c <= 0x1FAFF) // unicode blocks from Mahjong Tiles to Symbols and Pictographs Extended-A
|| (0xE0000 <= c && c <= 0xE007F) // unicode block Tags
|| c == 0xFE0F; // variation selector emoji with color || c == 0xFE0F; // variation selector emoji with color
} }

File diff suppressed because one or more lines are too long

View file

@ -8,7 +8,6 @@ package org.dslul.openboard.inputmethod.latin.inputlogic;
import android.graphics.Color; import android.graphics.Color;
import android.os.SystemClock; import android.os.SystemClock;
import android.text.InputType;
import android.text.SpannableString; import android.text.SpannableString;
import android.text.Spanned; import android.text.Spanned;
import android.text.TextUtils; import android.text.TextUtils;
@ -42,6 +41,7 @@ import org.dslul.openboard.inputmethod.latin.WordComposer;
import org.dslul.openboard.inputmethod.latin.common.Constants; import org.dslul.openboard.inputmethod.latin.common.Constants;
import org.dslul.openboard.inputmethod.latin.common.InputPointers; import org.dslul.openboard.inputmethod.latin.common.InputPointers;
import org.dslul.openboard.inputmethod.latin.common.StringUtils; import org.dslul.openboard.inputmethod.latin.common.StringUtils;
import org.dslul.openboard.inputmethod.latin.common.StringUtilsKt;
import org.dslul.openboard.inputmethod.latin.define.DebugFlags; import org.dslul.openboard.inputmethod.latin.define.DebugFlags;
import org.dslul.openboard.inputmethod.latin.settings.SettingsValues; import org.dslul.openboard.inputmethod.latin.settings.SettingsValues;
import org.dslul.openboard.inputmethod.latin.settings.SpacingAndPunctuations; import org.dslul.openboard.inputmethod.latin.settings.SpacingAndPunctuations;
@ -231,7 +231,9 @@ public final class InputLogic {
mConnection.beginBatchEdit(); mConnection.beginBatchEdit();
if (mWordComposer.isComposingWord()) { if (mWordComposer.isComposingWord()) {
commitCurrentAutoCorrection(settingsValues, rawText, handler); commitCurrentAutoCorrection(settingsValues, rawText, handler);
addToHistoryIfEmoji(rawText, settingsValues); // add emoji after committing text
} else { } else {
addToHistoryIfEmoji(rawText, settingsValues); // add emoji before resetting, otherwise lastComposedWord is empty
resetComposingState(true /* alsoResetLastComposedWord */); resetComposingState(true /* alsoResetLastComposedWord */);
} }
handler.postUpdateSuggestionStrip(SuggestedWords.INPUT_STYLE_TYPING); handler.postUpdateSuggestionStrip(SuggestedWords.INPUT_STYLE_TYPING);
@ -841,6 +843,7 @@ public final class InputLogic {
// until the next character is entered, and the word is added to history // until the next character is entered, and the word is added to history
// -> the changing selection would be confusing, and adding partial URLs to history is probably bad // -> the changing selection would be confusing, and adding partial URLs to history is probably bad
if (Character.getType(codePoint) == Character.OTHER_SYMBOL if (Character.getType(codePoint) == Character.OTHER_SYMBOL
|| (Character.getType(codePoint) == Character.UNASSIGNED && StringUtils.mightBeEmoji(codePoint)) // outdated java doesn't detect some emojis
|| (sv.isWordSeparator(codePoint) || (sv.isWordSeparator(codePoint)
&& (Character.isWhitespace(codePoint) // whitespace is always a separator && (Character.isWhitespace(codePoint) // whitespace is always a separator
|| !textBeforeCursorMayBeUrlOrSimilar(sv, false) // if text before is not URL or similar, it's a separator || !textBeforeCursorMayBeUrlOrSimilar(sv, false) // if text before is not URL or similar, it's a separator
@ -849,6 +852,7 @@ public final class InputLogic {
) )
) { ) {
handleSeparatorEvent(event, inputTransaction, handler); handleSeparatorEvent(event, inputTransaction, handler);
addToHistoryIfEmoji(StringUtils.newSingleCodePointString(codePoint), sv);
} else { } else {
if (SpaceState.PHANTOM == inputTransaction.getMSpaceState()) { if (SpaceState.PHANTOM == inputTransaction.getMSpaceState()) {
if (mWordComposer.isCursorFrontOrMiddleOfComposingWord()) { if (mWordComposer.isCursorFrontOrMiddleOfComposingWord()) {
@ -866,6 +870,25 @@ public final class InputLogic {
} }
} }
private void addToHistoryIfEmoji(final String text, final SettingsValues settingsValues) {
if (mLastComposedWord == LastComposedWord.NOT_A_COMPOSED_WORD // we want a last composed word, also to avoid storing consecutive emojis
|| mWordComposer.isComposingWord() // emoji will be part of the word in this case, better do nothing
|| !settingsValues.mAutoCorrectionEnabledPerUserSettings
|| settingsValues.mIncognitoModeEnabled // add nothing
|| !StringUtilsKt.isEmoji(text) // obviously we need an emoji
) return;
mLastComposedWord = LastComposedWord.NOT_A_COMPOSED_WORD; // avoid storing consecutive emojis
// commit emoji to dictionary, so it ends up in history and can be suggested as next word
mDictionaryFacilitator.addToUserHistory(
text,
false,
mConnection.getNgramContextFromNthPreviousWord(settingsValues.mSpacingAndPunctuations, 2),
(int) TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()),
settingsValues.mBlockPotentiallyOffensive
);
}
/** /**
* Handle a non-separator. * Handle a non-separator.
* @param event The event to handle. * @param event The event to handle.
@ -1017,7 +1040,7 @@ public final class InputLogic {
mConnection.getCodePointBeforeCursor())) { mConnection.getCodePointBeforeCursor())) {
needsPrecedingSpace = false; needsPrecedingSpace = false;
} else { } else {
needsPrecedingSpace = settingsValues.isUsuallyPrecededBySpace(codePoint); needsPrecedingSpace = settingsValues.isUsuallyPrecededBySpace(codePoint) || StringUtilsKt.isEmoji(codePoint);
} }
if (needsPrecedingSpace) { if (needsPrecedingSpace) {
@ -1246,9 +1269,9 @@ public final class InputLogic {
} }
final int lengthToDelete = final int lengthToDelete =
Character.isSupplementaryCodePoint(codePointBeforeCursor) ? 2 : 1; Character.isSupplementaryCodePoint(codePointBeforeCursor) ? 2 : 1;
if (StringUtils.probablyIsEmojiCodePoint(codePointBeforeCursor)) { if (StringUtils.mightBeEmoji(codePointBeforeCursor)) {
// emoji length varies, so we'd need to find out length to delete correctly // emoji length varies, so we'd need to find out length to delete correctly
// this is not optimal, but a reasonable workaround for issues when trying to delete emojis // the solution is not optimal, but a reasonable workaround for issues when trying to delete emojis
sendDownUpKeyEvent(KeyEvent.KEYCODE_DEL); sendDownUpKeyEvent(KeyEvent.KEYCODE_DEL);
} else { } else {
mConnection.deleteTextBeforeCursor(lengthToDelete); mConnection.deleteTextBeforeCursor(lengthToDelete);
@ -1532,10 +1555,8 @@ public final class InputLogic {
} }
if (TextUtils.isEmpty(suggestion)) return; if (TextUtils.isEmpty(suggestion)) return;
final boolean wasAutoCapitalized = final boolean wasAutoCapitalized = mWordComposer.wasAutoCapitalized() && !mWordComposer.isMostlyCaps();
mWordComposer.wasAutoCapitalized() && !mWordComposer.isMostlyCaps(); final int timeStampInSeconds = (int)TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
final int timeStampInSeconds = (int)TimeUnit.MILLISECONDS.toSeconds(
System.currentTimeMillis());
mDictionaryFacilitator.addToUserHistory(stripWordSeparatorsFromEnd(suggestion, settingsValues), wasAutoCapitalized, mDictionaryFacilitator.addToUserHistory(stripWordSeparatorsFromEnd(suggestion, settingsValues), wasAutoCapitalized,
ngramContext, timeStampInSeconds, settingsValues.mBlockPotentiallyOffensive); ngramContext, timeStampInSeconds, settingsValues.mBlockPotentiallyOffensive);
} }

View file

@ -463,6 +463,37 @@ class InputLogicTest {
assertEquals("b", composingText) assertEquals("b", composingText)
} }
@Test fun `emoji is added to dictionary`() {
// check both text and codepoint input
reset()
chainInput("hello ")
input(0x1F36D)
assertEquals(StringUtils.newSingleCodePointString(0x1F36D), lastAddedWord)
reset()
chainInput("hello ")
input("🤗")
assertEquals("\uD83E\uDD17", lastAddedWord)
reset()
chainInput("hello ")
input("why 🤗 ") // not added because it's not only emoji (input can come from pasting)
assertEquals("hello", lastAddedWord)
}
@Test fun `emoji uses phantom space`() {
// check both text and codepoint input
reset()
pickSuggestion("hi")
input("🤗")
assertEquals("\uD83E\uDD17", lastAddedWord)
assertEquals("hi \uD83E\uDD17", text)
reset()
pickSuggestion("hi")
input(0x1F36D)
assertEquals(StringUtils.newSingleCodePointString(0x1F36D), lastAddedWord)
assertEquals("hi ${StringUtils.newSingleCodePointString(0x1F36D)}", text)
}
// ------- helper functions --------- // ------- helper functions ---------
// should be called before every test, so the same state is guaranteed // should be called before every test, so the same state is guaranteed
@ -495,7 +526,7 @@ class InputLogicTest {
handleMessages() handleMessages()
if (phantomSpaceToInsert.isEmpty()) if (phantomSpaceToInsert.isEmpty())
assertEquals(oldBefore + phantomSpaceToInsert + insert, textBeforeCursor) assertEquals(oldBefore + insert, textBeforeCursor)
else // in some cases autospace might be suppressed else // in some cases autospace might be suppressed
assert(oldBefore + phantomSpaceToInsert + insert == textBeforeCursor || oldBefore + insert == textBeforeCursor) assert(oldBefore + phantomSpaceToInsert + insert == textBeforeCursor || oldBefore + insert == textBeforeCursor)
assertEquals(oldAfter, textAfterCursor) assertEquals(oldAfter, textAfterCursor)
@ -514,11 +545,16 @@ class InputLogicTest {
private fun input(insert: String) { private fun input(insert: String) {
val oldBefore = textBeforeCursor val oldBefore = textBeforeCursor
val oldAfter = textAfterCursor val oldAfter = textAfterCursor
val phantomSpaceToInsert = if (spaceState == SpaceState.PHANTOM) " " else ""
latinIME.onTextInput(insert) latinIME.onTextInput(insert)
handleMessages() handleMessages()
if (phantomSpaceToInsert.isEmpty())
assertEquals(oldBefore + insert, textBeforeCursor) assertEquals(oldBefore + insert, textBeforeCursor)
else // in some cases autospace might be suppressed
assert(oldBefore + phantomSpaceToInsert + insert == textBeforeCursor || oldBefore + insert == textBeforeCursor)
assert(oldBefore + insert == textBeforeCursor || "$oldBefore $insert" == textBeforeCursor)
assertEquals(oldAfter, textAfterCursor) assertEquals(oldAfter, textAfterCursor)
assertEquals(textBeforeCursor + textAfterCursor, getText()) assertEquals(textBeforeCursor + textAfterCursor, getText())
checkConnectionConsistency() checkConnectionConsistency()