diff --git a/simplex-chat.cabal b/simplex-chat.cabal index 52e40d6c6f..43fe1ca046 100644 --- a/simplex-chat.cabal +++ b/simplex-chat.cabal @@ -277,6 +277,7 @@ library , tls >=1.9.0 && <1.10 , unliftio ==0.2.* , unliftio-core ==0.2.* + , uri-bytestring >=0.3.3.1 && <0.4 , uuid ==1.3.* , zip ==2.0.* default-language: Haskell2010 diff --git a/src/Simplex/Chat/Markdown.hs b/src/Simplex/Chat/Markdown.hs index 4eb0c72a1b..9508782d2c 100644 --- a/src/Simplex/Chat/Markdown.hs +++ b/src/Simplex/Chat/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} @@ -16,7 +17,8 @@ import qualified Data.Aeson as J import qualified Data.Aeson.TH as JQ import Data.Attoparsec.Text (Parser) import qualified Data.Attoparsec.Text as A -import Data.Char (isDigit, isPunctuation, isSpace) +import qualified Data.ByteString.Char8 as B +import Data.Char (isAlpha, isAscii, isDigit, isPunctuation, isSpace) import Data.Either (fromRight) import Data.Functor (($>)) import Data.List (foldl', intercalate) @@ -32,10 +34,11 @@ import Simplex.Chat.Types import Simplex.Messaging.Agent.Protocol (AConnectionLink (..), ConnReqUriData (..), ConnShortLink (..), ConnectionLink (..), ConnectionRequestUri (..), ContactConnType (..), SMPQueue (..), simplexConnReqUri, simplexShortLink) import Simplex.Messaging.Encoding.String import Simplex.Messaging.Parsers (defaultJSON, dropPrefix, enumJSON, fstToLower, sumTypeJSON) -import Simplex.Messaging.Protocol (ProtocolServer (..)) +import Simplex.Messaging.Protocol (ProtocolServer (..), sameSrvAddr) import Simplex.Messaging.Util (decodeJSON, safeDecodeUtf8) import System.Console.ANSI.Types import qualified Text.Email.Validate as Email +import qualified URI.ByteString as U data Markdown = Markdown (Maybe Format) Text | Markdown :|: Markdown deriving (Eq, Show) @@ -47,23 +50,30 @@ data Format | Snippet | Secret | Colored {color :: FormatColor} - | WebLink {text :: WLText, scheme :: Text, originalUri :: Text, sanitizedUri :: Text} - | SimplexLink {linkType :: SimplexLinkType, simplexUri :: AConnectionLink, smpHosts :: NonEmpty Text} + -- linkUri is Nothing when there is no link text or it is the same as URI, in which case the text of the fragment is URI. + -- sanitizedUri is Nothing when original URI is already sanitized. + -- spoofed is True when link text is a valid URI, and it is different from link URI. + | WebLink {scheme :: Text, linkUri :: Maybe Text, sanitizedUri :: Maybe Text, spoofed :: Bool} + | SimplexLink {linkType :: SimplexLinkType, simplexUri :: AConnectionLink, smpHosts :: NonEmpty Text, spoofed :: Bool} | Mention {memberName :: Text} | Email | Phone deriving (Eq, Show) --- This type provides support for descriptive markdown links, such as [click here](https://example.com), --- with detection of any potentially malicious links, such as: --- - tracking parameters (tracked via difference between originalUri and sanitizedUri in WebLink) --- - WLBadPath: different path in link text from the path in the link, e.g. [example.com/hello](https://example.com/goodbuy), except query string parameters --- - WLBadHost: different domain in the link text from the domain in the link, e.g [example.com](https://error.com) --- Sending clients should reject such links, receiving clients should warn before opening links with bad path or host and show them in red. -data WLText - = WLText {linkText :: Maybe Text} -- the text is Nothing if it's the same as the link, it should be used to decide whether to show alert before opening. - | WLBadPath {badLink :: Text} - | WLBadHost {badLink :: Text} +data SimplexOrWebLink = SOWSimplex AConnectionLink | SOWWeb U.URI + +parseLinkUri :: Text -> Either String SimplexOrWebLink +parseLinkUri t = case strDecode s of + Right cLink -> Right $ SOWSimplex cLink + Left _ -> case U.parseURI U.laxURIParserOptions s of + Right uri@U.URI {uriAuthority} -> case uriAuthority of + Just U.Authority {authorityHost = U.Host h} + | B.elem '.' h -> Right $ SOWWeb uri + | otherwise -> Left "Invalid URI host" + Nothing -> Left "No URI host" + Left e -> Left $ "Invalid URI: " <> show e + where + s = encodeUtf8 t mentionedNames :: MarkdownList -> [Text] mentionedNames = mapMaybe (\(FormattedText f _) -> mentionedName =<< f) @@ -184,6 +194,7 @@ markdownP = mconcat <$> A.many' fragmentP '#' -> A.char '#' *> secretP '!' -> coloredP <|> wordP '@' -> mentionP <|> wordP + '[' -> webLinkP <|> wordP _ | isDigit c -> phoneP <|> wordP | otherwise -> wordP @@ -218,6 +229,20 @@ markdownP = mconcat <$> A.many' fragmentP name <- displayNameTextP let sName = if c == '\'' then '\'' `T.cons` name `T.snoc` '\'' else name pure $ markdown (Mention name) ('@' `T.cons` sName) + webLinkP = do + t <- A.char '[' *> A.takeWhile1 (/= ']') <* A.char ']' + uri <- A.char '(' *> A.takeWhile1 (/= ')') <* A.char ')' + sowLink <- either fail pure $ parseLinkUri uri + case sowLink of + SOWSimplex _ -> fail "SimpleX links with link text not supported" + SOWWeb _ -> pure () + let t' = T.dropAround isPunctuation $ T.filter (not . isSpace) t + (linkUri, spoofed) = case either (\_ -> parseLinkUri ("https://" <> t')) Right $ parseLinkUri t' of + Right _ + | t == uri -> (Nothing, False) + | otherwise -> (Just uri, True) + Left _ -> (Just uri, False) + pure $ markdown (sowLinkFormat linkUri spoofed sowLink) t colorP = A.anyChar >>= \case 'r' -> "ed" $> Red <|> pure Red @@ -251,23 +276,42 @@ markdownP = mconcat <$> A.many' fragmentP let t = T.takeWhileEnd isPunctuation' s uri = uriMarkdown $ T.dropWhileEnd isPunctuation' s in if T.null t then uri else uri :|: unmarked t + | isDomain s = markdown (WebLink "https" (Just $ "https://" <> s) Nothing False) s | isEmail s = markdown Email s | otherwise = unmarked s isPunctuation' = \case '/' -> False ')' -> False c -> isPunctuation c - uriMarkdown s = case strDecode $ encodeUtf8 s of - Right cLink -> markdown (simplexUriFormat cLink) s - _ -> markdown Uri s + uriMarkdown s = case parseLinkUri s of + Right sowLink -> markdown (sowLinkFormat Nothing False sowLink) s + Left _ -> unmarked s isUri s = T.length s >= 10 && any (`T.isPrefixOf` s) ["http://", "https://", "simplex:/"] + isDomain s = case T.splitOn "." s of + [name, tld] -> validDomain name tld + [sub, name, tld] -> T.length sub > 0 && T.length sub <= 8 && validDomain name tld + _ -> False + where + validDomain name tld = + (let n = T.length name in n >= 1 && n <= 24) + && (let n = T.length tld in n >= 2 && n <= 8) + && (let p c = isAscii c && isAlpha c in T.all p name && T.all p tld) isEmail s = T.any (== '@') s && Email.isValid (encodeUtf8 s) noFormat = pure . unmarked - simplexUriFormat :: AConnectionLink -> Format - simplexUriFormat = \case + sowLinkFormat :: Maybe Text -> Bool -> SimplexOrWebLink -> Format + sowLinkFormat linkUri spoofed = \case + SOWSimplex cLink -> simplexUriFormat spoofed cLink + SOWWeb uri@U.URI {uriScheme = U.Scheme sch, uriQuery = U.Query originalQS} -> + let sanitizedQS = filter (\(p, _) -> p == "q" || p == "search") originalQS + sanitizedUri + | length sanitizedQS == length originalQS = Nothing + | otherwise = Just $ safeDecodeUtf8 $ U.serializeURIRef' uri {U.uriQuery = U.Query sanitizedQS} + in WebLink {scheme = safeDecodeUtf8 sch, linkUri, sanitizedUri, spoofed} + simplexUriFormat :: Bool -> AConnectionLink -> Format + simplexUriFormat spoofed = \case ACL m (CLFull cReq) -> case cReq of - CRContactUri crData -> SimplexLink (linkType' crData) cLink $ uriHosts crData - CRInvitationUri crData _ -> SimplexLink XLInvitation cLink $ uriHosts crData + CRContactUri crData -> SimplexLink (linkType' crData) cLink (uriHosts crData) spoofed + CRInvitationUri crData _ -> SimplexLink XLInvitation cLink (uriHosts crData) spoofed where cLink = ACL m $ CLFull $ simplexConnReqUri cReq uriHosts ConnReqUriData {crSmpQueues} = L.map strEncodeText $ sconcat $ L.map (host . qServer) crSmpQueues @@ -275,8 +319,8 @@ markdownP = mconcat <$> A.many' fragmentP Just (CRDataGroup _) -> XLGroup Nothing -> XLContact ACL m (CLShort sLnk) -> case sLnk of - CSLContact _ ct srv _ -> SimplexLink (linkType' ct) cLink $ uriHosts srv - CSLInvitation _ srv _ _ -> SimplexLink XLInvitation cLink $ uriHosts srv + CSLContact _ ct srv _ -> SimplexLink (linkType' ct) cLink (uriHosts srv) spoofed + CSLInvitation _ srv _ _ -> SimplexLink XLInvitation cLink (uriHosts srv) spoofed where cLink = ACL m $ CLShort $ simplexShortLink sLnk uriHosts srv = L.map strEncodeText $ host srv @@ -297,8 +341,11 @@ markdownText (FormattedText f_ t) = case f_ of Snippet -> around '`' Secret -> around '#' Colored (FormatColor c) -> color c - Uri -> t - SimplexLink {} -> t + WebLink {linkUri} -> case linkUri of + Just uri | uri /= t && uri /= ("https://" <> t) -> + "[" <> t <> "](" <> uri <> ")" + _ -> t + SimplexLink {simplexUri} -> t Mention _ -> t Email -> t Phone -> t diff --git a/tests/Bots/DirectoryTests.hs b/tests/Bots/DirectoryTests.hs index 0877a48daa..62e46744c4 100644 --- a/tests/Bots/DirectoryTests.hs +++ b/tests/Bots/DirectoryTests.hs @@ -14,7 +14,6 @@ import Control.Exception (finally) import Control.Monad (forM_, when) import qualified Data.Text as T import Directory.Captcha -import qualified Directory.Events as DE import Directory.Options import Directory.Service import Directory.Store @@ -22,6 +21,7 @@ import GHC.IO.Handle (hClose) import Simplex.Chat.Bot.KnownContacts import Simplex.Chat.Controller (ChatConfig (..), ChatHooks (..), defaultChatHooks) import Simplex.Chat.Core +import qualified Simplex.Chat.Markdown as Markdown import Simplex.Chat.Options (CoreChatOpts (..)) import Simplex.Chat.Options.DB import Simplex.Chat.Types (Profile (..)) @@ -111,7 +111,7 @@ serviceDbPrefix :: FilePath serviceDbPrefix = "directory_service" viewName :: String -> String -viewName = T.unpack . DE.viewName . T.pack +viewName = T.unpack . Markdown.viewName . T.pack testDirectoryService :: HasCallStack => TestParams -> IO () testDirectoryService ps = diff --git a/tests/MarkdownTests.hs b/tests/MarkdownTests.hs index fc872f05b1..7574f04ff6 100644 --- a/tests/MarkdownTests.hs +++ b/tests/MarkdownTests.hs @@ -20,6 +20,7 @@ markdownTests = do secretText textColor textWithUri + textWithHyperlink textWithEmail textWithPhone textWithMentions @@ -168,23 +169,35 @@ textColor = describe "text color (red)" do <==> "snippet: " <> markdown Snippet "this is !1 red text!" uri :: Text -> Markdown -uri = Markdown $ Just Uri +uri = Markdown $ Just baseUriFormat + +httpUri :: Text -> Markdown +httpUri = Markdown $ Just baseUriFormat {scheme = "http"} + +weblink :: Text -> Text -> Markdown +weblink u = Markdown $ Just baseUriFormat {linkUri = Just u} + +sanitizedWebLink :: Text -> Text -> Markdown +sanitizedWebLink u = Markdown $ Just baseUriFormat {sanitizedUri = Just u} + +baseUriFormat :: Format +baseUriFormat = WebLink {scheme = "https", linkUri = Nothing, sanitizedUri = Nothing, spoofed = False} simplexLink :: SimplexLinkType -> Text -> NonEmpty Text -> Text -> Markdown simplexLink linkType uriText smpHosts t = Markdown (simplexLinkFormat linkType uriText smpHosts) t simplexLinkFormat :: SimplexLinkType -> Text -> NonEmpty Text -> Maybe Format simplexLinkFormat linkType uriText smpHosts = case strDecode $ encodeUtf8 uriText of - Right simplexUri -> Just SimplexLink {linkType, simplexUri, smpHosts} + Right simplexUri -> Just SimplexLink {linkType, simplexUri, smpHosts, spoofed = False} Left e -> error e textWithUri :: Spec -textWithUri = describe "text with Uri" do +textWithUri = describe "text with WebLink without link text" do it "correct markdown" do "https://simplex.chat" <==> uri "https://simplex.chat" "https://simplex.chat." <==> uri "https://simplex.chat" <> "." "https://simplex.chat, hello" <==> uri "https://simplex.chat" <> ", hello" - "http://simplex.chat" <==> uri "http://simplex.chat" + "http://simplex.chat" <==> httpUri "http://simplex.chat" "this is https://simplex.chat" <==> "this is " <> uri "https://simplex.chat" "https://simplex.chat site" <==> uri "https://simplex.chat" <> " site" "SimpleX on GitHub: https://github.com/simplex-chat/" <==> "SimpleX on GitHub: " <> uri "https://github.com/simplex-chat/" @@ -192,10 +205,26 @@ textWithUri = describe "text with Uri" do "https://github.com/simplex-chat/ - SimpleX on GitHub" <==> uri "https://github.com/simplex-chat/" <> " - SimpleX on GitHub" -- "SimpleX on GitHub (https://github.com/simplex-chat/)" <==> "SimpleX on GitHub (" <> uri "https://github.com/simplex-chat/" <> ")" "https://en.m.wikipedia.org/wiki/Servo_(software)" <==> uri "https://en.m.wikipedia.org/wiki/Servo_(software)" + "example.com" <==> weblink "https://example.com" "example.com" + "www.example.com" <==> weblink "https://www.example.com" "www.example.com" + "example.academy" <==> weblink "https://example.academy" "example.academy" + "this is example.com" <==> "this is " <> weblink "https://example.com" "example.com" + "x.com" <==> weblink "https://x.com" "x.com" + "https://example.com/?ref=123" <==> sanitizedWebLink "https://example.com/" "https://example.com/?ref=123" + "https://example.com/?ref=123#anchor" <==> sanitizedWebLink "https://example.com/#anchor" "https://example.com/?ref=123#anchor" + "https://example.com/#anchor?ref=123" <==> uri "https://example.com/#anchor?ref=123" + "https://duckduckgo.com/?q=search+string" <==> uri "https://duckduckgo.com/?q=search+string" + "https://duckduckgo.com/?t=h_&q=search+string&ia=web" <==> sanitizedWebLink "https://duckduckgo.com/?q=search%20string" "https://duckduckgo.com/?t=h_&q=search+string&ia=web" it "ignored as markdown" do "_https://simplex.chat" <==> "_https://simplex.chat" "this is _https://simplex.chat" <==> "this is _https://simplex.chat" "this is https://" <==> "this is https://" + "example.c" <==> "example.c" + "www.www.example.com" <==> "www.www.example.com" + "www.example1.com" <==> "www.example1.com" + "www." <==> "www." + ".com" <==> ".com" + "example.academytoolong" <==> "example.academytoolong" it "SimpleX links" do let inv = "/invitation#/?v=1&smp=smp%3A%2F%2F1234-w%3D%3D%40smp.simplex.im%3A5223%2F3456-w%3D%3D%23%2F%3Fv%3D1-2%26dh%3DMCowBQYDK2VuAyEAjiswwI3O_NlS8Fk3HJUW870EY2bAwmttMBsvRB9eV3o%253D&e2e=v%3D2%26x3dh%3DMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D%2CMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D" ("https://simplex.chat" <> inv) <==> simplexLink XLInvitation ("simplex:" <> inv) ["smp.simplex.im"] ("https://simplex.chat" <> inv) @@ -208,6 +237,25 @@ textWithUri = describe "text with Uri" do ("https://simplex.chat" <> gr) <==> simplexLink XLGroup ("simplex:" <> gr) ["smp4.simplex.im", "o5vmywmrnaxalvz6wi3zicyftgio6psuvyniis6gco6bp6ekl4cqj4id.onion"] ("https://simplex.chat" <> gr) ("simplex:" <> gr) <==> simplexLink XLGroup ("simplex:" <> gr) ["smp4.simplex.im", "o5vmywmrnaxalvz6wi3zicyftgio6psuvyniis6gco6bp6ekl4cqj4id.onion"] ("simplex:" <> gr) +spoofedLink :: Text -> Text -> Markdown +spoofedLink u = Markdown $ Just baseUriFormat {linkUri = Just u, spoofed = True} + +textWithHyperlink :: Spec +textWithHyperlink = describe "text with WebLink without link text" do + it "correct markdown" do + "[click here](https://example.com)" <==> weblink "https://example.com" "click here" + "For details [click here](https://example.com)" <==> "For details " <> weblink "https://example.com" "click here" + it "spoofed link" do + "[https://example.com](https://another.com)" <==> spoofedLink "https://another.com" "https://example.com" + "[example.com/page](https://another.com/page)" <==> spoofedLink "https://another.com/page" "example.com/page" + "[ example.com/page!. ](https://another.com/page)" <==> spoofedLink "https://another.com/page" " example.com/page!. " + "[ example . com/page !. ](https://another.com/page)" <==> spoofedLink "https://another.com/page" " example . com/page !. " + it "ignored as markdown" do + "[click here](example.com)" <==> "[click here](example.com)" + "[click here](https://example.com )" <==> "[click here](https://example.com )" + let inv = "/invitation#/?v=1&smp=smp%3A%2F%2F1234-w%3D%3D%40smp.simplex.im%3A5223%2F3456-w%3D%3D%23%2F%3Fv%3D1-2%26dh%3DMCowBQYDK2VuAyEAjiswwI3O_NlS8Fk3HJUW870EY2bAwmttMBsvRB9eV3o%253D&e2e=v%3D2%26x3dh%3DMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D%2CMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D" + ("[Connect to me](https://simplex.chat" <> inv <> ")") <==> ("[Connect to me](https://simplex.chat" <> unmarked inv <> ")") + email :: Text -> Markdown email = Markdown $ Just Email @@ -221,11 +269,11 @@ textWithEmail = describe "text with Email" do "chat@simplex.chat test" <==> email "chat@simplex.chat" <> " test" "test1 chat@simplex.chat test2" <==> "test1 " <> email "chat@simplex.chat" <> " test2" it "ignored as markdown" do - "chat @simplex.chat" <==> "chat " <> mention "simplex.chat" "@simplex.chat" - "this is chat @simplex.chat" <==> "this is chat " <> mention "simplex.chat" "@simplex.chat" - "this is chat@ simplex.chat" <==> "this is chat@ simplex.chat" - "this is chat @ simplex.chat" <==> "this is chat @ simplex.chat" - "*this* is chat @ simplex.chat" <==> bold "this" <> " is chat @ simplex.chat" + "chat @'simplex.chat'" <==> "chat " <> mention "simplex.chat" "@'simplex.chat'" + "this is chat @'simplex.chat'" <==> "this is chat " <> mention "simplex.chat" "@'simplex.chat'" + "this is chat@ simplex.chat" <==> "this is chat@ " <> weblink "https://simplex.chat" "simplex.chat" + "this is chat @ simplex.chat" <==> "this is chat @ " <> weblink "https://simplex.chat" "simplex.chat" + "*this* is chat @ simplex.chat" <==> bold "this" <> " is chat @ " <> weblink "https://simplex.chat" "simplex.chat" phone :: Text -> Markdown phone = Markdown $ Just Phone @@ -268,14 +316,17 @@ textWithMentions = describe "text with mentions" do "hello @bob @" <==> "hello " <> mention "bob" "@bob" <> " @" uri' :: Text -> FormattedText -uri' = FormattedText $ Just Uri +uri' = FormattedText $ Just baseUriFormat + +httpUri' :: Text -> FormattedText +httpUri' = FormattedText $ Just baseUriFormat {scheme = "http"} multilineMarkdownList :: Spec multilineMarkdownList = describe "multiline markdown" do it "correct markdown" do - "http://simplex.chat\nhttp://app.simplex.chat" <<==>> [uri' "http://simplex.chat", "\n", uri' "http://app.simplex.chat"] + "http://simplex.chat\nhttp://app.simplex.chat" <<==>> [httpUri' "http://simplex.chat", "\n", httpUri' "http://app.simplex.chat"] it "combines the same formats" do - "http://simplex.chat\ntext 1\ntext 2\nhttp://app.simplex.chat" <<==>> [uri' "http://simplex.chat", "\ntext 1\ntext 2\n", uri' "http://app.simplex.chat"] + "http://simplex.chat\ntext 1\ntext 2\nhttp://app.simplex.chat" <<==>> [httpUri' "http://simplex.chat", "\ntext 1\ntext 2\n", httpUri' "http://app.simplex.chat"] it "no markdown" do parseMaybeMarkdownList "not a\nmarkdown" `shouldBe` Nothing let inv = "/invitation#/?v=1&smp=smp%3A%2F%2F1234-w%3D%3D%40smp.simplex.im%3A5223%2F3456-w%3D%3D%23%2F%3Fv%3D1-2%26dh%3DMCowBQYDK2VuAyEAjiswwI3O_NlS8Fk3HJUW870EY2bAwmttMBsvRB9eV3o%253D&e2e=v%3D2%26x3dh%3DMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D%2CMEIwBQYDK2VvAzkAmKuSYeQ_m0SixPDS8Wq8VBaTS1cW-Lp0n0h4Diu-kUpR-qXx4SDJ32YGEFoGFGSbGPry5Ychr6U%3D" diff --git a/tests/Test.hs b/tests/Test.hs index 1d8d45ebb4..24a2176762 100644 --- a/tests/Test.hs +++ b/tests/Test.hs @@ -49,7 +49,7 @@ main = do describe "Schema dump" schemaDumpTest around tmpBracket $ describe "WebRTC encryption" webRTCTests #endif - describe "SimpleX chat markdown" markdownTests + fdescribe "SimpleX chat markdown" markdownTests describe "JSON Tests" jsonTests describe "SimpleX chat view" viewTests describe "SimpleX chat protocol" protocolTests