From 85d32f3ed50f8bdde4b89cbbf5475ed485ff9ee9 Mon Sep 17 00:00:00 2001 From: Cristian Lara Date: Mon, 2 Dec 2024 17:15:29 -0500 Subject: [PATCH] [Tutorials] Improve conversion of math to MDX - Properly convert /begin.../end math blocks by wrapping in $$ - Update our regex for unescaping braces in math to support math wrapped by double dollar signs and containing newlines. - Make sure $$ symbols are not escaped and include line breaks before and after. I encountered a few examples in our notebooks that didn't match this formatting expected by mdx. These changes were made when adapting this script to work in the botorch repo: - https://github.com/CristianLara/botorch/pull/3/commits/a0b3eaac561d1a78dc5015e938a10c44e5cfbe39 - https://github.com/CristianLara/botorch/pull/3/commits/5118c533766f7d7c958e61c6fb37a3fb892e2819 --- scripts/convert_ipynb_to_mdx.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/convert_ipynb_to_mdx.py b/scripts/convert_ipynb_to_mdx.py index b98151f05c1..37aec12bbcd 100644 --- a/scripts/convert_ipynb_to_mdx.py +++ b/scripts/convert_ipynb_to_mdx.py @@ -312,13 +312,20 @@ def sanitize_mdx(mdx: str) -> str: # Remove some lingering HTML tags that break MDX. mdx = mdx.replace("", "") mdx = mdx.replace("", "") + mdx = mdx.replace("
", "
") # Remove any HTML comments from the Markdown. They are fine to keep in the # notebooks, but are not really useful in the MDX. mdx = re.sub("()", "", mdx, flags=re.DOTALL) # "\" Escape braces to make the text MDX compatible. mdx = re.sub("([^\\\\])([{}])", "\\g<1>\\\\\\g<2>", mdx) - # Escaping braces causes issues in math blocks, unescape them. - mdx = re.sub("\\$(.*?)\\$", lambda match: match[0].replace("\\{", "{").replace("\\}", "}"), mdx) + + # -- KaTeX -- + # Wrap '\begin{}...\end{}' in $$ for KaTeX to work. + mdx = re.sub("(\\\\begin\\\\{(\\w*?)\\\\}(.|\n)*?end\\\\{\\2\\\\})", "$$\\g<1>$$", mdx) + # # make sure $$ symbols are not escaped and include line breaks. + mdx = re.sub("\\\\?\\$\\\\?\\$((?:.|\n)*?)\\\\?\\$\\\\?\\$", "\n$$\n\\g<1>\n$$\n", mdx) + # # Escaping braces causes issues in math blocks, unescape them. + mdx = re.sub("\\$?\\$(.|\n)*?\\$\\$?", lambda match: match[0].replace("\\{", "{").replace("\\}", "}"), mdx) return mdx