From 70482543971b79efa2ba0a6a36205758f4320894 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 1 Oct 2024 10:16:23 -0700 Subject: [PATCH] Allow extracting data: URIs even in PandocPure (`--sandbox`). These don't require IO, so we should allow it in sandboxed mode. Closes #10249. --- src/Text/Pandoc/Class/IO.hs | 12 +++--------- src/Text/Pandoc/Class/PandocMonad.hs | 9 +++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 12feeeb3cc9b..e39fb11f9fc4 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import System.IO.Error import System.Random (StdGen) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Class.PandocMonad - (PandocMonad, getsCommonState, getMediaBag, report) + (PandocMonad, getsCommonState, getMediaBag, report, extractURIData) import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) @@ -128,14 +128,8 @@ newUniqueHash = hashUnique <$> liftIO Data.Unique.newUnique openURL :: (PandocMonad m, MonadIO m) => Text -> m (B.ByteString, Maybe MimeType) openURL u | Just (URI{ uriScheme = "data:", - uriPath = upath }) <- parseURI (T.unpack u) = do - let (mimespec, rest) = break (== ',') $ unEscapeString upath - let contents = UTF8.fromString $ drop 1 rest - case break (== ';') (filter (/= ' ') mimespec) of - (mime, ";base64") -> - return (decodeLenient contents, Just (T.pack mime)) - (mime, _) -> - return (contents, Just (T.pack mime)) + uriPath = upath }) <- parseURI (T.unpack u) + = pure $ extractURIData upath | otherwise = do let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v) customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 51d017c06e58..c13b3df217d9 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -335,8 +335,7 @@ downloadOrRead :: PandocMonad m -> m (B.ByteString, Maybe MimeType) downloadOrRead s = do sourceURL <- getsCommonState stSourceURL - case (sourceURL >>= parseURIReference' . - ensureEscaped, ensureEscaped s) of + case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of (Just u, s') -> -- try fetching from relative path at source case parseURIReference' s' of Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u @@ -348,8 +347,10 @@ downloadOrRead s = do Nothing -> openURL s' -- will throw error (Nothing, s') -> case parseURI (T.unpack s') of -- requires absolute URI - Just u' | uriScheme u' == "file:" -> - readLocalFile $ uriPathToPath (T.pack $ uriPath u') + Just URI{ uriScheme = "file:", uriPath = upath} + -> readLocalFile $ uriPathToPath (T.pack upath) + Just URI{ uriScheme = "data:", uriPath = upath} + -> pure $ extractURIData upath -- We don't want to treat C:/ as a scheme: Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u') _ -> readLocalFile fp -- get from local file system