From dffb7e970f33959a0b97fb8df267a54d01a98a2a Mon Sep 17 00:00:00 2001 From: Kaushal Modi Date: Wed, 20 Dec 2017 11:23:56 -0500 Subject: [PATCH] Rewrite org-hugo--sanitize-title; make it more robust The org-hugo--sanitize-title function is re-written, now with added test cases for even apparently pathological cases. Now it is possible to have *, _ and ` characters in the post title. --- ox-hugo.el | 25 ++++++++------ test/site/content-org/all-posts.org | 33 +++++++++++++++++++ test/site/content/posts/ndash-and-mdash.md | 2 +- .../content/posts/title_with_asterisks.md | 8 +++++ ...le_with_asterisks_underscores_backticks.md | 8 +++++ .../title_with_backticks_equals_tildes.md | 9 +++++ .../posts/title_with_forward_slashes.md | 9 +++++ .../content/posts/title_with_under_scores.md | 9 +++++ 8 files changed, 92 insertions(+), 11 deletions(-) create mode 100644 test/site/content/posts/title_with_asterisks.md create mode 100644 test/site/content/posts/title_with_asterisks_underscores_backticks.md create mode 100644 test/site/content/posts/title_with_backticks_equals_tildes.md create mode 100644 test/site/content/posts/title_with_forward_slashes.md create mode 100644 test/site/content/posts/title_with_under_scores.md diff --git a/ox-hugo.el b/ox-hugo.el index a6fd5c27..f6ae1506 100644 --- a/ox-hugo.el +++ b/ox-hugo.el @@ -1769,8 +1769,7 @@ to ((name . \"foo\") (weight . 80))." (defun org-hugo--sanitize-title (info) "Return sanitized version of the title string parsed from INFO. -- Remove bold, italics, monospace Markdown markup characters. -- Do not escape underscore characters in the title. +The title string is returned in a markup-free form. If exporting title is disabled by setting `org-export-with-title' to nil or using the OPTIONS keyword e.g. \"title:nil\", return @@ -1778,22 +1777,28 @@ nil. INFO is a plist used as a communication channel." (when (plist-get info :with-title) - (let* ((title (org-export-data (plist-get info :title) info)) - ;; Sanitize title.. cannot do bold, italics, monospace in title - (title (replace-regexp-in-string "\\\\?`" "" title)) - (title (replace-regexp-in-string "\\`__?\\|\\`\\*\\*?\\|__?\\'\\|\\*\\*?\\'" "" title)) - (title (replace-regexp-in-string " __?\\|__? \\| \\*\\*?\\|\\*\\*? " " " title)) - ;; Do not escape underscores in title; \_ -> _ - (title (replace-regexp-in-string "\\\\_" "_" title)) + (let* ((title (org-export-data-with-backend (plist-get info :title) 'html info)) + ;; The `title' variable contains the htmlized version of + ;; the Org title string. By using the HTML version, it + ;; will be easy to later strip off just the HTML tags. + ;; Hugo does not render Markdown in the titles and so the ;; Blackfriday smartDashes conversion does not work there. ;; So do that here instead. ;; Convert "---" and "--" to EM DASH (—) and EN DASH (–) respectively. ;; Convert "..." to HORIZONTAL ELLIPSIS (…). - ;; The order of below two replacements is important! + ;; Below two replacements are order sensitive! (title (replace-regexp-in-string "---\\([^-]\\)" "—" title)) ;EM DASH (title (replace-regexp-in-string "--\\([^-]\\)" "–" title)) ;EN DASH (title (replace-regexp-in-string "\\.\\.\\." "…" title))) ;HORIZONTAL ELLIPSIS + + ;; Remove ".." HTML tags, but retain the string + ;; wrapped in those tags. + ;; (message "ox-hugo sanitize title [dbg] title 1: %s" title) + (while (string-match "<\\(?1:[a-z]+\\)[^>]*>\\(?2:[^<]+\\)" title) + (setq title (replace-match "\\2" nil nil title)) + ;; (message "ox-hugo sanitize title [dbg] title 2: %s" title) + ) title))) (defun org-hugo--transform-org-tags (tag-list info &optional no-prefer-hyphen) diff --git a/test/site/content-org/all-posts.org b/test/site/content-org/all-posts.org index 6d7f9c2a..ad165d22 100644 --- a/test/site/content-org/all-posts.org +++ b/test/site/content-org/all-posts.org @@ -324,6 +324,39 @@ it is explicitly set to /empty/ using =:EXPORT_TITLE:=. This tests an =ox-hugo= feature that gets around an upstream limitation, where the Blackfriday /smartDashes/ rendering does not happen in post titles ({{{hugoissue(4175)}}}). +** *Title with **lots** and *lots* of * aste*risks** +:PROPERTIES: +:EXPORT_FILE_NAME: title_with_asterisks +:END: +- All asterisk characters meant for Org =*bold*= markup are removed. +- The rest of the asterisk characters are retained. +** Title with /lots/ and lots / / / of forward/slashes +:PROPERTIES: +:EXPORT_FILE_NAME: title_with_forward_slashes +:END: +- All forward slash characters meant for Org =/italic/= markup are + removed. +- The rest of the forward slash characters are retained. +** _Title_ with __lots and _lots_ _ _ of under_scores__ +:PROPERTIES: +:EXPORT_FILE_NAME: title_with_under_scores +:END: +- All underscore characters meant for Org =_underline_= markup are + removed. +- The rest of the underscore characters are retained. +** =Title with lots= of ` ` ` backticks` `foo =bar= ~ ~zoomba~ +:PROPERTIES: +:EXPORT_FILE_NAME: title_with_backticks_equals_tildes +:END: +- All equal/tilde characters meant for Org =~code~= or ~=verbatim=~ + markup are removed. +- The rest of the equal/tilde characters are retained. +** =Title with lots= of ` ` ` backticks` * asterisks *bold*, /italics/ =retain*,_,` in monospace= underscores _ _ _*foo*_ =bar= ** +:PROPERTIES: +:EXPORT_FILE_NAME: title_with_asterisks_underscores_backticks +:END: +- All the Org markup characters are removed. +- The "markup characters" not doing actual markup are retained. * Description meta-data with "quoted text" :PROPERTIES: :EXPORT_FILE_NAME: post-description-quotes diff --git a/test/site/content/posts/ndash-and-mdash.md b/test/site/content/posts/ndash-and-mdash.md index ae64f805..c99ff853 100644 --- a/test/site/content/posts/ndash-and-mdash.md +++ b/test/site/content/posts/ndash-and-mdash.md @@ -1,5 +1,5 @@ +++ -title = "ndash and mdash" +title = "ndash `and` mdash" date = 2017-07-22 tags = ["body"] draft = false diff --git a/test/site/content/posts/title_with_asterisks.md b/test/site/content/posts/title_with_asterisks.md new file mode 100644 index 00000000..5974d15d --- /dev/null +++ b/test/site/content/posts/title_with_asterisks.md @@ -0,0 +1,8 @@ ++++ +title = "Title with *lots and lots of * aste*risks**" +tags = ["title"] +draft = false ++++ + +- All asterisk characters meant for Org `*bold*` markup are removed. +- The rest of the asterisk characters are retained. diff --git a/test/site/content/posts/title_with_asterisks_underscores_backticks.md b/test/site/content/posts/title_with_asterisks_underscores_backticks.md new file mode 100644 index 00000000..f0c72bc8 --- /dev/null +++ b/test/site/content/posts/title_with_asterisks_underscores_backticks.md @@ -0,0 +1,8 @@ ++++ +title = "Title with lots of ` ` ` backticks` * asterisks bold, italics retain*,_,` in monospace underscores _ _ foo bar **" +tags = ["title"] +draft = false ++++ + +- All the Org markup characters are removed. +- The "markup characters" not doing actual markup are retained. diff --git a/test/site/content/posts/title_with_backticks_equals_tildes.md b/test/site/content/posts/title_with_backticks_equals_tildes.md new file mode 100644 index 00000000..7fec3ef2 --- /dev/null +++ b/test/site/content/posts/title_with_backticks_equals_tildes.md @@ -0,0 +1,9 @@ ++++ +title = "Title with lots of ` ` ` backticks` `foo bar ~ zoomba" +tags = ["title"] +draft = false ++++ + +- All equal/tilde characters meant for Org `~code~` or `=verbatim=` + markup are removed. +- The rest of the equal/tilde characters are retained. diff --git a/test/site/content/posts/title_with_forward_slashes.md b/test/site/content/posts/title_with_forward_slashes.md new file mode 100644 index 00000000..2a95debf --- /dev/null +++ b/test/site/content/posts/title_with_forward_slashes.md @@ -0,0 +1,9 @@ ++++ +title = "Title with lots and lots / / / of forward/slashes" +tags = ["title"] +draft = false ++++ + +- All forward slash characters meant for Org `/italic/` markup are + removed. +- The rest of the forward slash characters are retained. diff --git a/test/site/content/posts/title_with_under_scores.md b/test/site/content/posts/title_with_under_scores.md new file mode 100644 index 00000000..7ef581c8 --- /dev/null +++ b/test/site/content/posts/title_with_under_scores.md @@ -0,0 +1,9 @@ ++++ +title = "Title with _lots and _lots _ _ of under_scores__" +tags = ["title"] +draft = false ++++ + +- All underscore characters meant for Org `_underline_` markup are + removed. +- The rest of the underscore characters are retained.