core, ui: support trailing punctuation for mentions, URIs (also support domains), and email addresses (#5888)

* core: improve markdown parser for mentions, URIs, and email addresses

* ui
This commit is contained in:
Evgeny 2025-05-12 11:22:35 +01:00 committed by GitHub
parent e1aa32952e
commit 2a43a02af3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 89 additions and 34 deletions

View file

@ -288,7 +288,11 @@ func messageText(
case .uri:
attrs = linkAttrs()
if !preview {
attrs[linkAttrKey] = NSURL(string: ft.text)
let s = t.lowercased()
let link = s.hasPrefix("http://") || s.hasPrefix("https://")
? t
: "https://" + t
attrs[linkAttrKey] = NSURL(string: link)
attrs[webLinkAttrKey] = true
handleTaps = true
}
@ -314,9 +318,9 @@ func messageText(
if m.memberId == userMemberId {
attrs[.foregroundColor] = UIColor.tintColor
}
t = "@'\(name)'"
t = mentionText(name)
} else {
t = "@'\(memberName)'"
t = mentionText(memberName)
}
}
case .email:
@ -351,6 +355,11 @@ func messageText(
}
}
@inline(__always)
private func mentionText(_ name: String) -> String {
name.contains(" @") ? "@'\(name)'" : "@\(name)"
}
func simplexLinkText(_ linkType: SimplexLinkType, _ smpHosts: [String]) -> String {
linkType.description + " " + "(via \(smpHosts.first ?? "?"))"
}

View file

@ -196,7 +196,9 @@ struct GroupMentionsView: View {
newName = composeState.mentionMemberName(member.wrapped.memberProfile.displayName)
}
mentions[newName] = CIMention(groupMember: member.wrapped)
var msgMention = "@" + (newName.contains(" ") ? "'\(newName)'" : newName)
var msgMention = newName.contains(" ") || newName.last?.isPunctuation == true
? "@'\(newName)'"
: "@\(newName)"
var newPos = r.location + msgMention.count
let newMsgLength = composeState.message.count + msgMention.count - r.length
print(newPos)

View file

@ -3902,7 +3902,7 @@ enum class MsgContentTag {
class FormattedText(val text: String, val format: Format? = null) {
// TODO make it dependent on simplexLinkMode preference
fun link(mode: SimplexLinkMode): String? = when (format) {
is Format.Uri -> text
is Format.Uri -> if (text.startsWith("http://", ignoreCase = true) || text.startsWith("https://", ignoreCase = true)) text else "https://$text"
is Format.SimplexLink -> if (mode == SimplexLinkMode.BROWSER) text else format.simplexUri
is Format.Email -> "mailto:$text"
is Format.Phone -> "tel:$text"

View file

@ -23,6 +23,12 @@ import chat.simplex.common.views.chatlist.setGroupMembers
import chat.simplex.common.views.helpers.*
import chat.simplex.res.MR
import kotlinx.coroutines.launch
import kotlin.text.CharCategory.*
val punctuation = setOf(
DASH_PUNCTUATION, START_PUNCTUATION, END_PUNCTUATION,
CONNECTOR_PUNCTUATION, OTHER_PUNCTUATION
)
private val PICKER_ROW_SIZE = MEMBER_ROW_AVATAR_SIZE + (MEMBER_ROW_VERTICAL_PADDING * 2f)
private val MAX_PICKER_HEIGHT = (PICKER_ROW_SIZE * 4) + (MEMBER_ROW_AVATAR_SIZE + MEMBER_ROW_VERTICAL_PADDING - 4.dp)
@ -126,7 +132,9 @@ fun GroupMentions(
}
val newName = existingMention?.key ?: composeState.value.mentionMemberName(member.memberProfile.displayName)
mentions[newName] = CIMention(member)
var msgMention = "@" + if (newName.contains(" ")) "'$newName'" else newName
var msgMention = if (newName.contains(" ") || (newName.lastOrNull()?.category in punctuation))
"@'$newName'"
else "@$newName"
var newPos = range.start + msgMention.length
val newMsgLength = composeState.value.message.text.length + msgMention.length - range.length
if (newPos == newMsgLength) {

View file

@ -16,7 +16,6 @@ module Directory.Events
SDirectoryRole (..),
crDirectoryEvent,
directoryCmdTag,
viewName,
)
where
@ -291,9 +290,6 @@ directoryCmdP =
-- wordP = spacesP *> A.takeTill isSpace
spacesP = A.takeWhile1 isSpace
viewName :: Text -> Text
viewName n = if T.any isSpace n then "'" <> n <> "'" else n
directoryCmdTag :: DirectoryCmd r -> Text
directoryCmdTag = \case
DCHelp _ -> "help"

View file

@ -48,7 +48,7 @@ import Simplex.Chat.Bot
import Simplex.Chat.Bot.KnownContacts
import Simplex.Chat.Controller
import Simplex.Chat.Core
import Simplex.Chat.Markdown (FormattedText (..), Format (..), parseMaybeMarkdownList)
import Simplex.Chat.Markdown (FormattedText (..), Format (..), parseMaybeMarkdownList, viewName)
import Simplex.Chat.Messages
import Simplex.Chat.Options
import Simplex.Chat.Protocol (MsgContent (..))

View file

@ -4,6 +4,7 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use newtype instead of data" #-}
@ -16,7 +17,7 @@ import qualified Data.Aeson as J
import qualified Data.Aeson.TH as JQ
import Data.Attoparsec.Text (Parser)
import qualified Data.Attoparsec.Text as A
import Data.Char (isDigit, isPunctuation, isSpace)
import Data.Char (isAlpha, isAscii, isDigit, isPunctuation, isSpace)
import Data.Either (fromRight)
import Data.Functor (($>))
import Data.List (foldl', intercalate)
@ -204,17 +205,18 @@ markdownP = mconcat <$> A.many' fragmentP
else pure $ markdown (colored clr) s
mentionP = do
c <- A.char '@' *> A.peekChar'
name <- displayNameTextP
(name, punct) <- displayNameTextP_
let sName = if c == '\'' then '\'' `T.cons` name `T.snoc` '\'' else name
pure $ markdown (Mention name) ('@' `T.cons` sName)
mention = markdown (Mention name) ('@' `T.cons` sName)
pure $ if T.null punct then mention else mention :|: unmarked punct
colorP =
A.anyChar >>= \case
'r' -> "ed" $> Red <|> pure Red
'g' -> "reen" $> Green <|> pure Green
'b' -> "lue" $> Blue <|> pure Blue
'y' -> "ellow" $> Yellow <|> pure Yellow
'c' -> "yan" $> Cyan <|> pure Cyan
'm' -> "agenta" $> Magenta <|> pure Magenta
'r' -> optional "ed" $> Red
'g' -> optional "reen" $> Green
'b' -> optional "lue" $> Blue
'y' -> optional "ellow" $> Yellow
'c' -> optional "yan" $> Cyan
'm' -> optional "agenta" $> Magenta
'1' -> pure Red
'2' -> pure Green
'3' -> pure Blue
@ -236,12 +238,14 @@ markdownP = mconcat <$> A.many' fragmentP
wordMD :: Text -> Markdown
wordMD s
| T.null s = unmarked s
| isUri s =
let t = T.takeWhileEnd isPunctuation' s
uri = uriMarkdown $ T.dropWhileEnd isPunctuation' s
in if T.null t then uri else uri :|: unmarked t
| isEmail s = markdown Email s
| isUri s' = res $ uriMarkdown s'
| isDomain s' = res $ markdown Uri s'
| isEmail s' = res $ markdown Email s'
| otherwise = unmarked s
where
punct = T.takeWhileEnd isPunctuation' s
s' = T.dropWhileEnd isPunctuation' s
res md' = if T.null punct then md' else md' :|: unmarked punct
isPunctuation' = \case
'/' -> False
')' -> False
@ -250,6 +254,16 @@ markdownP = mconcat <$> A.many' fragmentP
Right cLink -> markdown (simplexUriFormat cLink) s
_ -> markdown Uri s
isUri s = T.length s >= 10 && any (`T.isPrefixOf` s) ["http://", "https://", "simplex:/"]
-- matches what is likely to be a domain, not all valid domain names
isDomain s = case T.splitOn "." s of
[name, tld] -> isDomain_ name tld
[sub, name, tld] -> T.length sub >= 3 && T.length sub <= 8 && isDomain_ name tld
_ -> False
where
isDomain_ name tld =
(let n = T.length name in n >= 1 && n <= 24)
&& (let n = T.length tld in n >= 2 && n <= 8)
&& (let p c = isAscii c && isAlpha c in T.all p name && T.all p tld)
isEmail s = T.any (== '@') s && Email.isValid (encodeUtf8 s)
noFormat = pure . unmarked
simplexUriFormat :: AConnectionLink -> Format
@ -307,16 +321,22 @@ markdownText (FormattedText f_ t) = case f_ of
White -> Nothing
displayNameTextP :: Parser Text
displayNameTextP = quoted '\'' <|> takeNameTill (== ' ')
displayNameTextP = displayNameTextP_ >>= \(t, sfx) -> if T.null sfx then pure t else fail "Name ends with punctuation"
{-# INLINE displayNameTextP #-}
displayNameTextP_ :: Parser (Text, Text)
displayNameTextP_ = (,"") <$> quoted '\'' <|> splitPunctuation <$> takeNameTill isSpace
where
takeNameTill p =
A.peekChar' >>= \c ->
if refChar c then A.takeTill p else fail "invalid first character in display name"
splitPunctuation s = (T.dropWhileEnd isPunctuation s, T.takeWhileEnd isPunctuation s)
quoted c = A.char c *> takeNameTill (== c) <* A.char c
refChar c = c > ' ' && c /= '#' && c /= '@' && c /= '\''
-- quotes names that contain spaces or end on punctuation
viewName :: Text -> Text
viewName s = if T.any isSpace s then "'" <> s <> "'" else s
viewName s = if T.any isSpace s || maybe False (isPunctuation . snd) (T.unsnoc s) then "'" <> s <> "'" else s
$(JQ.deriveJSON (enumJSON $ dropPrefix "XL") ''SimplexLinkType)

View file

@ -1119,7 +1119,7 @@ Query: UPDATE rcv_messages SET user_ack = ? WHERE conn_id = ? AND internal_id =
Plan:
SEARCH rcv_messages USING COVERING INDEX idx_rcv_messages_conn_id_internal_id (conn_id=? AND internal_id=?)
Query: UPDATE rcv_queues SET last_broker_ts = ? WHERE conn_id = ? AND rcv_queue_id = ? AND last_broker_ts < ?
Query: UPDATE rcv_queues SET last_broker_ts = ? WHERE conn_id = ? AND rcv_queue_id = ? AND (last_broker_ts IS NULL OR last_broker_ts < ?)
Plan:
SEARCH rcv_queues USING INDEX idx_rcv_queue_id (conn_id=? AND rcv_queue_id=?)

View file

@ -14,7 +14,6 @@ import Control.Exception (finally)
import Control.Monad (forM_, when)
import qualified Data.Text as T
import Directory.Captcha
import qualified Directory.Events as DE
import Directory.Options
import Directory.Service
import Directory.Store
@ -22,6 +21,7 @@ import GHC.IO.Handle (hClose)
import Simplex.Chat.Bot.KnownContacts
import Simplex.Chat.Controller (ChatConfig (..), ChatHooks (..), defaultChatHooks)
import Simplex.Chat.Core
import qualified Simplex.Chat.Markdown as MD
import Simplex.Chat.Options (CoreChatOpts (..))
import Simplex.Chat.Options.DB
import Simplex.Chat.Types (Profile (..))
@ -111,7 +111,7 @@ serviceDbPrefix :: FilePath
serviceDbPrefix = "directory_service"
viewName :: String -> String
viewName = T.unpack . DE.viewName . T.pack
viewName = T.unpack . MD.viewName . T.pack
testDirectoryService :: HasCallStack => TestParams -> IO ()
testDirectoryService ps =

View file

@ -192,10 +192,23 @@ textWithUri = describe "text with Uri" do
"https://github.com/simplex-chat/ - SimpleX on GitHub" <==> uri "https://github.com/simplex-chat/" <> " - SimpleX on GitHub"
-- "SimpleX on GitHub (https://github.com/simplex-chat/)" <==> "SimpleX on GitHub (" <> uri "https://github.com/simplex-chat/" <> ")"
"https://en.m.wikipedia.org/wiki/Servo_(software)" <==> uri "https://en.m.wikipedia.org/wiki/Servo_(software)"
"example.com" <==> uri "example.com"
"example.com." <==> uri "example.com" <> "."
"example.com..." <==> uri "example.com" <> "..."
"www.example.com" <==> uri "www.example.com"
"example.academy" <==> uri "example.academy"
"this is example.com" <==> "this is " <> uri "example.com"
"x.com" <==> uri "x.com"
it "ignored as markdown" do
"_https://simplex.chat" <==> "_https://simplex.chat"
"this is _https://simplex.chat" <==> "this is _https://simplex.chat"
"this is https://" <==> "this is https://"
"example.c" <==> "example.c"
"www.www.example.com" <==> "www.www.example.com"
"www.example1.com" <==> "www.example1.com"
"www." <==> "www."
".com" <==> ".com"
"example.academytoolong" <==> "example.academytoolong"
it "SimpleX links" do
let inv = "/invitation#/?v=1&smp=smp%3A%2F%2F1234-w%3D%3D%40smp.simplex.im%3A5223%2F3456-w%3D%3D%23%2F%3Fv%3D1-2%26dh%3DMCowBQYDK2VuAyEAjiswwI3O_NlS8Fk3HJUW870EY2bAwmttMBsvRB9eV3o%253D&e2e=v%3D2%26x3dh%3DMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D%2CMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D"
("https://simplex.chat" <> inv) <==> simplexLink XLInvitation ("simplex:" <> inv) ["smp.simplex.im"] ("https://simplex.chat" <> inv)
@ -220,12 +233,14 @@ textWithEmail = describe "text with Email" do
"test chat.chat+123@simplex.chat" <==> "test " <> email "chat.chat+123@simplex.chat"
"chat@simplex.chat test" <==> email "chat@simplex.chat" <> " test"
"test1 chat@simplex.chat test2" <==> "test1 " <> email "chat@simplex.chat" <> " test2"
it "ignored as markdown" do
"test chat@simplex.chat." <==> "test " <> email "chat@simplex.chat" <> "."
"test chat@simplex.chat..." <==> "test " <> email "chat@simplex.chat" <> "..."
it "ignored as email markdown" do
"chat @simplex.chat" <==> "chat " <> mention "simplex.chat" "@simplex.chat"
"this is chat @simplex.chat" <==> "this is chat " <> mention "simplex.chat" "@simplex.chat"
"this is chat@ simplex.chat" <==> "this is chat@ simplex.chat"
"this is chat @ simplex.chat" <==> "this is chat @ simplex.chat"
"*this* is chat @ simplex.chat" <==> bold "this" <> " is chat @ simplex.chat"
"this is chat@ simplex.chat" <==> "this is chat@ " <> uri "simplex.chat"
"this is chat @ simplex.chat" <==> "this is chat @ " <> uri "simplex.chat"
"*this* is chat @ simplex.chat" <==> bold "this" <> " is chat @ " <> uri "simplex.chat"
phone :: Text -> Markdown
phone = Markdown $ Just Phone
@ -258,8 +273,13 @@ textWithMentions = describe "text with mentions" do
"@alice" <==> mention "alice" "@alice"
"hello @alice" <==> "hello " <> mention "alice" "@alice"
"hello @alice !" <==> "hello " <> mention "alice" "@alice" <> " !"
"hello @alice!" <==> "hello " <> mention "alice" "@alice" <> "!"
"hello @alice..." <==> "hello " <> mention "alice" "@alice" <> "..."
"hello @alice@example.com" <==> "hello " <> mention "alice@example.com" "@alice@example.com"
"hello @'alice @ example.com'" <==> "hello " <> mention "alice @ example.com" "@'alice @ example.com'"
"@'alice jones'" <==> mention "alice jones" "@'alice jones'"
"hello @'alice jones'!" <==> "hello " <> mention "alice jones" "@'alice jones'" <> "!"
"hello @'a.j.'!" <==> "hello " <> mention "a.j." "@'a.j.'" <> "!"
it "ignored as markdown" $ do
"hello @'alice jones!" <==> "hello @'alice jones!"
"hello @bob @'alice jones!" <==> "hello " <> mention "bob" "@bob" <> " @'alice jones!"