From 2a43a02af3570bedce68b15b7dd63632d26613cf Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 12 May 2025 11:22:35 +0100 Subject: [PATCH] core, ui: support trailing punctuation for mentions, URIs (also support domains), and email addresses (#5888) * core: improve markdown parser for mentions, URIs, and email addresses * ui --- .../Views/Chat/ChatItem/MsgContentView.swift | 15 ++++-- .../Views/Chat/Group/GroupMentions.swift | 4 +- .../chat/simplex/common/model/ChatModel.kt | 2 +- .../common/views/chat/group/GroupMentions.kt | 10 +++- .../src/Directory/Events.hs | 4 -- .../src/Directory/Service.hs | 2 +- src/Simplex/Chat/Markdown.hs | 52 +++++++++++++------ .../SQLite/Migrations/agent_query_plans.txt | 2 +- tests/Bots/DirectoryTests.hs | 4 +- tests/MarkdownTests.hs | 28 ++++++++-- 10 files changed, 89 insertions(+), 34 deletions(-) diff --git a/apps/ios/Shared/Views/Chat/ChatItem/MsgContentView.swift b/apps/ios/Shared/Views/Chat/ChatItem/MsgContentView.swift index aab4177cbf..e04584dfff 100644 --- a/apps/ios/Shared/Views/Chat/ChatItem/MsgContentView.swift +++ b/apps/ios/Shared/Views/Chat/ChatItem/MsgContentView.swift @@ -288,7 +288,11 @@ func messageText( case .uri: attrs = linkAttrs() if !preview { - attrs[linkAttrKey] = NSURL(string: ft.text) + let s = t.lowercased() + let link = s.hasPrefix("http://") || s.hasPrefix("https://") + ? t + : "https://" + t + attrs[linkAttrKey] = NSURL(string: link) attrs[webLinkAttrKey] = true handleTaps = true } @@ -314,9 +318,9 @@ func messageText( if m.memberId == userMemberId { attrs[.foregroundColor] = UIColor.tintColor } - t = "@'\(name)'" + t = mentionText(name) } else { - t = "@'\(memberName)'" + t = mentionText(memberName) } } case .email: @@ -351,6 +355,11 @@ func messageText( } } +@inline(__always) +private func mentionText(_ name: String) -> String { + name.contains(" @") ? "@'\(name)'" : "@\(name)" +} + func simplexLinkText(_ linkType: SimplexLinkType, _ smpHosts: [String]) -> String { linkType.description + " " + "(via \(smpHosts.first ?? "?"))" } diff --git a/apps/ios/Shared/Views/Chat/Group/GroupMentions.swift b/apps/ios/Shared/Views/Chat/Group/GroupMentions.swift index 168f0490c3..9bb4a0cc35 100644 --- a/apps/ios/Shared/Views/Chat/Group/GroupMentions.swift +++ b/apps/ios/Shared/Views/Chat/Group/GroupMentions.swift @@ -196,7 +196,9 @@ struct GroupMentionsView: View { newName = composeState.mentionMemberName(member.wrapped.memberProfile.displayName) } mentions[newName] = CIMention(groupMember: member.wrapped) - var msgMention = "@" + (newName.contains(" ") ? "'\(newName)'" : newName) + var msgMention = newName.contains(" ") || newName.last?.isPunctuation == true + ? "@'\(newName)'" + : "@\(newName)" var newPos = r.location + msgMention.count let newMsgLength = composeState.message.count + msgMention.count - r.length print(newPos) diff --git a/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/model/ChatModel.kt b/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/model/ChatModel.kt index 6660cbbb93..6ee609020a 100644 --- a/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/model/ChatModel.kt +++ b/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/model/ChatModel.kt @@ -3902,7 +3902,7 @@ enum class MsgContentTag { class FormattedText(val text: String, val format: Format? = null) { // TODO make it dependent on simplexLinkMode preference fun link(mode: SimplexLinkMode): String? = when (format) { - is Format.Uri -> text + is Format.Uri -> if (text.startsWith("http://", ignoreCase = true) || text.startsWith("https://", ignoreCase = true)) text else "https://$text" is Format.SimplexLink -> if (mode == SimplexLinkMode.BROWSER) text else format.simplexUri is Format.Email -> "mailto:$text" is Format.Phone -> "tel:$text" diff --git a/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/views/chat/group/GroupMentions.kt b/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/views/chat/group/GroupMentions.kt index 1a63375432..91f4f5173c 100644 --- a/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/views/chat/group/GroupMentions.kt +++ b/apps/multiplatform/common/src/commonMain/kotlin/chat/simplex/common/views/chat/group/GroupMentions.kt @@ -23,6 +23,12 @@ import chat.simplex.common.views.chatlist.setGroupMembers import chat.simplex.common.views.helpers.* import chat.simplex.res.MR import kotlinx.coroutines.launch +import kotlin.text.CharCategory.* + +val punctuation = setOf( + DASH_PUNCTUATION, START_PUNCTUATION, END_PUNCTUATION, + CONNECTOR_PUNCTUATION, OTHER_PUNCTUATION +) private val PICKER_ROW_SIZE = MEMBER_ROW_AVATAR_SIZE + (MEMBER_ROW_VERTICAL_PADDING * 2f) private val MAX_PICKER_HEIGHT = (PICKER_ROW_SIZE * 4) + (MEMBER_ROW_AVATAR_SIZE + MEMBER_ROW_VERTICAL_PADDING - 4.dp) @@ -126,7 +132,9 @@ fun GroupMentions( } val newName = existingMention?.key ?: composeState.value.mentionMemberName(member.memberProfile.displayName) mentions[newName] = CIMention(member) - var msgMention = "@" + if (newName.contains(" ")) "'$newName'" else newName + var msgMention = if (newName.contains(" ") || (newName.lastOrNull()?.category in punctuation)) + "@'$newName'" + else "@$newName" var newPos = range.start + msgMention.length val newMsgLength = composeState.value.message.text.length + msgMention.length - range.length if (newPos == newMsgLength) { diff --git a/apps/simplex-directory-service/src/Directory/Events.hs b/apps/simplex-directory-service/src/Directory/Events.hs index 37d2b63d2f..faaccbd2bf 100644 --- a/apps/simplex-directory-service/src/Directory/Events.hs +++ b/apps/simplex-directory-service/src/Directory/Events.hs @@ -16,7 +16,6 @@ module Directory.Events SDirectoryRole (..), crDirectoryEvent, directoryCmdTag, - viewName, ) where @@ -291,9 +290,6 @@ directoryCmdP = -- wordP = spacesP *> A.takeTill isSpace spacesP = A.takeWhile1 isSpace -viewName :: Text -> Text -viewName n = if T.any isSpace n then "'" <> n <> "'" else n - directoryCmdTag :: DirectoryCmd r -> Text directoryCmdTag = \case DCHelp _ -> "help" diff --git a/apps/simplex-directory-service/src/Directory/Service.hs b/apps/simplex-directory-service/src/Directory/Service.hs index 4b02e0b294..4517ee9c5b 100644 --- a/apps/simplex-directory-service/src/Directory/Service.hs +++ b/apps/simplex-directory-service/src/Directory/Service.hs @@ -48,7 +48,7 @@ import Simplex.Chat.Bot import Simplex.Chat.Bot.KnownContacts import Simplex.Chat.Controller import Simplex.Chat.Core -import Simplex.Chat.Markdown (FormattedText (..), Format (..), parseMaybeMarkdownList) +import Simplex.Chat.Markdown (FormattedText (..), Format (..), parseMaybeMarkdownList, viewName) import Simplex.Chat.Messages import Simplex.Chat.Options import Simplex.Chat.Protocol (MsgContent (..)) diff --git a/src/Simplex/Chat/Markdown.hs b/src/Simplex/Chat/Markdown.hs index e5de9c408c..9811556fe0 100644 --- a/src/Simplex/Chat/Markdown.hs +++ b/src/Simplex/Chat/Markdown.hs @@ -4,6 +4,7 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TupleSections #-} {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} {-# HLINT ignore "Use newtype instead of data" #-} @@ -16,7 +17,7 @@ import qualified Data.Aeson as J import qualified Data.Aeson.TH as JQ import Data.Attoparsec.Text (Parser) import qualified Data.Attoparsec.Text as A -import Data.Char (isDigit, isPunctuation, isSpace) +import Data.Char (isAlpha, isAscii, isDigit, isPunctuation, isSpace) import Data.Either (fromRight) import Data.Functor (($>)) import Data.List (foldl', intercalate) @@ -204,17 +205,18 @@ markdownP = mconcat <$> A.many' fragmentP else pure $ markdown (colored clr) s mentionP = do c <- A.char '@' *> A.peekChar' - name <- displayNameTextP + (name, punct) <- displayNameTextP_ let sName = if c == '\'' then '\'' `T.cons` name `T.snoc` '\'' else name - pure $ markdown (Mention name) ('@' `T.cons` sName) + mention = markdown (Mention name) ('@' `T.cons` sName) + pure $ if T.null punct then mention else mention :|: unmarked punct colorP = A.anyChar >>= \case - 'r' -> "ed" $> Red <|> pure Red - 'g' -> "reen" $> Green <|> pure Green - 'b' -> "lue" $> Blue <|> pure Blue - 'y' -> "ellow" $> Yellow <|> pure Yellow - 'c' -> "yan" $> Cyan <|> pure Cyan - 'm' -> "agenta" $> Magenta <|> pure Magenta + 'r' -> optional "ed" $> Red + 'g' -> optional "reen" $> Green + 'b' -> optional "lue" $> Blue + 'y' -> optional "ellow" $> Yellow + 'c' -> optional "yan" $> Cyan + 'm' -> optional "agenta" $> Magenta '1' -> pure Red '2' -> pure Green '3' -> pure Blue @@ -236,12 +238,14 @@ markdownP = mconcat <$> A.many' fragmentP wordMD :: Text -> Markdown wordMD s | T.null s = unmarked s - | isUri s = - let t = T.takeWhileEnd isPunctuation' s - uri = uriMarkdown $ T.dropWhileEnd isPunctuation' s - in if T.null t then uri else uri :|: unmarked t - | isEmail s = markdown Email s + | isUri s' = res $ uriMarkdown s' + | isDomain s' = res $ markdown Uri s' + | isEmail s' = res $ markdown Email s' | otherwise = unmarked s + where + punct = T.takeWhileEnd isPunctuation' s + s' = T.dropWhileEnd isPunctuation' s + res md' = if T.null punct then md' else md' :|: unmarked punct isPunctuation' = \case '/' -> False ')' -> False @@ -250,6 +254,16 @@ markdownP = mconcat <$> A.many' fragmentP Right cLink -> markdown (simplexUriFormat cLink) s _ -> markdown Uri s isUri s = T.length s >= 10 && any (`T.isPrefixOf` s) ["http://", "https://", "simplex:/"] + -- matches what is likely to be a domain, not all valid domain names + isDomain s = case T.splitOn "." s of + [name, tld] -> isDomain_ name tld + [sub, name, tld] -> T.length sub >= 3 && T.length sub <= 8 && isDomain_ name tld + _ -> False + where + isDomain_ name tld = + (let n = T.length name in n >= 1 && n <= 24) + && (let n = T.length tld in n >= 2 && n <= 8) + && (let p c = isAscii c && isAlpha c in T.all p name && T.all p tld) isEmail s = T.any (== '@') s && Email.isValid (encodeUtf8 s) noFormat = pure . unmarked simplexUriFormat :: AConnectionLink -> Format @@ -307,16 +321,22 @@ markdownText (FormattedText f_ t) = case f_ of White -> Nothing displayNameTextP :: Parser Text -displayNameTextP = quoted '\'' <|> takeNameTill (== ' ') +displayNameTextP = displayNameTextP_ >>= \(t, sfx) -> if T.null sfx then pure t else fail "Name ends with punctuation" +{-# INLINE displayNameTextP #-} + +displayNameTextP_ :: Parser (Text, Text) +displayNameTextP_ = (,"") <$> quoted '\'' <|> splitPunctuation <$> takeNameTill isSpace where takeNameTill p = A.peekChar' >>= \c -> if refChar c then A.takeTill p else fail "invalid first character in display name" + splitPunctuation s = (T.dropWhileEnd isPunctuation s, T.takeWhileEnd isPunctuation s) quoted c = A.char c *> takeNameTill (== c) <* A.char c refChar c = c > ' ' && c /= '#' && c /= '@' && c /= '\'' +-- quotes names that contain spaces or end on punctuation viewName :: Text -> Text -viewName s = if T.any isSpace s then "'" <> s <> "'" else s +viewName s = if T.any isSpace s || maybe False (isPunctuation . snd) (T.unsnoc s) then "'" <> s <> "'" else s $(JQ.deriveJSON (enumJSON $ dropPrefix "XL") ''SimplexLinkType) diff --git a/src/Simplex/Chat/Store/SQLite/Migrations/agent_query_plans.txt b/src/Simplex/Chat/Store/SQLite/Migrations/agent_query_plans.txt index a10d3f3db7..13215dcb75 100644 --- a/src/Simplex/Chat/Store/SQLite/Migrations/agent_query_plans.txt +++ b/src/Simplex/Chat/Store/SQLite/Migrations/agent_query_plans.txt @@ -1119,7 +1119,7 @@ Query: UPDATE rcv_messages SET user_ack = ? WHERE conn_id = ? AND internal_id = Plan: SEARCH rcv_messages USING COVERING INDEX idx_rcv_messages_conn_id_internal_id (conn_id=? AND internal_id=?) -Query: UPDATE rcv_queues SET last_broker_ts = ? WHERE conn_id = ? AND rcv_queue_id = ? AND last_broker_ts < ? +Query: UPDATE rcv_queues SET last_broker_ts = ? WHERE conn_id = ? AND rcv_queue_id = ? AND (last_broker_ts IS NULL OR last_broker_ts < ?) Plan: SEARCH rcv_queues USING INDEX idx_rcv_queue_id (conn_id=? AND rcv_queue_id=?) diff --git a/tests/Bots/DirectoryTests.hs b/tests/Bots/DirectoryTests.hs index 0877a48daa..f8a5aa8b80 100644 --- a/tests/Bots/DirectoryTests.hs +++ b/tests/Bots/DirectoryTests.hs @@ -14,7 +14,6 @@ import Control.Exception (finally) import Control.Monad (forM_, when) import qualified Data.Text as T import Directory.Captcha -import qualified Directory.Events as DE import Directory.Options import Directory.Service import Directory.Store @@ -22,6 +21,7 @@ import GHC.IO.Handle (hClose) import Simplex.Chat.Bot.KnownContacts import Simplex.Chat.Controller (ChatConfig (..), ChatHooks (..), defaultChatHooks) import Simplex.Chat.Core +import qualified Simplex.Chat.Markdown as MD import Simplex.Chat.Options (CoreChatOpts (..)) import Simplex.Chat.Options.DB import Simplex.Chat.Types (Profile (..)) @@ -111,7 +111,7 @@ serviceDbPrefix :: FilePath serviceDbPrefix = "directory_service" viewName :: String -> String -viewName = T.unpack . DE.viewName . T.pack +viewName = T.unpack . MD.viewName . T.pack testDirectoryService :: HasCallStack => TestParams -> IO () testDirectoryService ps = diff --git a/tests/MarkdownTests.hs b/tests/MarkdownTests.hs index fc872f05b1..05d5362cf1 100644 --- a/tests/MarkdownTests.hs +++ b/tests/MarkdownTests.hs @@ -192,10 +192,23 @@ textWithUri = describe "text with Uri" do "https://github.com/simplex-chat/ - SimpleX on GitHub" <==> uri "https://github.com/simplex-chat/" <> " - SimpleX on GitHub" -- "SimpleX on GitHub (https://github.com/simplex-chat/)" <==> "SimpleX on GitHub (" <> uri "https://github.com/simplex-chat/" <> ")" "https://en.m.wikipedia.org/wiki/Servo_(software)" <==> uri "https://en.m.wikipedia.org/wiki/Servo_(software)" + "example.com" <==> uri "example.com" + "example.com." <==> uri "example.com" <> "." + "example.com..." <==> uri "example.com" <> "..." + "www.example.com" <==> uri "www.example.com" + "example.academy" <==> uri "example.academy" + "this is example.com" <==> "this is " <> uri "example.com" + "x.com" <==> uri "x.com" it "ignored as markdown" do "_https://simplex.chat" <==> "_https://simplex.chat" "this is _https://simplex.chat" <==> "this is _https://simplex.chat" "this is https://" <==> "this is https://" + "example.c" <==> "example.c" + "www.www.example.com" <==> "www.www.example.com" + "www.example1.com" <==> "www.example1.com" + "www." <==> "www." + ".com" <==> ".com" + "example.academytoolong" <==> "example.academytoolong" it "SimpleX links" do let inv = "/invitation#/?v=1&smp=smp%3A%2F%2F1234-w%3D%3D%40smp.simplex.im%3A5223%2F3456-w%3D%3D%23%2F%3Fv%3D1-2%26dh%3DMCowBQYDK2VuAyEAjiswwI3O_NlS8Fk3HJUW870EY2bAwmttMBsvRB9eV3o%253D&e2e=v%3D2%26x3dh%3DMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D%2CMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D" ("https://simplex.chat" <> inv) <==> simplexLink XLInvitation ("simplex:" <> inv) ["smp.simplex.im"] ("https://simplex.chat" <> inv) @@ -220,12 +233,14 @@ textWithEmail = describe "text with Email" do "test chat.chat+123@simplex.chat" <==> "test " <> email "chat.chat+123@simplex.chat" "chat@simplex.chat test" <==> email "chat@simplex.chat" <> " test" "test1 chat@simplex.chat test2" <==> "test1 " <> email "chat@simplex.chat" <> " test2" - it "ignored as markdown" do + "test chat@simplex.chat." <==> "test " <> email "chat@simplex.chat" <> "." + "test chat@simplex.chat..." <==> "test " <> email "chat@simplex.chat" <> "..." + it "ignored as email markdown" do "chat @simplex.chat" <==> "chat " <> mention "simplex.chat" "@simplex.chat" "this is chat @simplex.chat" <==> "this is chat " <> mention "simplex.chat" "@simplex.chat" - "this is chat@ simplex.chat" <==> "this is chat@ simplex.chat" - "this is chat @ simplex.chat" <==> "this is chat @ simplex.chat" - "*this* is chat @ simplex.chat" <==> bold "this" <> " is chat @ simplex.chat" + "this is chat@ simplex.chat" <==> "this is chat@ " <> uri "simplex.chat" + "this is chat @ simplex.chat" <==> "this is chat @ " <> uri "simplex.chat" + "*this* is chat @ simplex.chat" <==> bold "this" <> " is chat @ " <> uri "simplex.chat" phone :: Text -> Markdown phone = Markdown $ Just Phone @@ -258,8 +273,13 @@ textWithMentions = describe "text with mentions" do "@alice" <==> mention "alice" "@alice" "hello @alice" <==> "hello " <> mention "alice" "@alice" "hello @alice !" <==> "hello " <> mention "alice" "@alice" <> " !" + "hello @alice!" <==> "hello " <> mention "alice" "@alice" <> "!" + "hello @alice..." <==> "hello " <> mention "alice" "@alice" <> "..." + "hello @alice@example.com" <==> "hello " <> mention "alice@example.com" "@alice@example.com" + "hello @'alice @ example.com'" <==> "hello " <> mention "alice @ example.com" "@'alice @ example.com'" "@'alice jones'" <==> mention "alice jones" "@'alice jones'" "hello @'alice jones'!" <==> "hello " <> mention "alice jones" "@'alice jones'" <> "!" + "hello @'a.j.'!" <==> "hello " <> mention "a.j." "@'a.j.'" <> "!" it "ignored as markdown" $ do "hello @'alice jones!" <==> "hello @'alice jones!" "hello @bob @'alice jones!" <==> "hello " <> mention "bob" "@bob" <> " @'alice jones!"