From 607a091918004c4ca10fb621e53a09a00b8d299b Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 14 Oct 2020 13:48:47 -0400 Subject: [PATCH] Account for Etree Elements in HTML Stash By calling str on all stash elements we ensure they don't raise an error. Worse case, soemthing like `` gets inserted into the output. However, with the override in the md_in_html extension, we actually serialize and reinsert the original HTML. Worse case, an HTML block which should be parsed as Markdown gets skipped by the extension (`
` gets inserting into the output). The tricky part is testing as there should be no known cases where this ever occurs. Therefore, we forefully pass an etree Element directly to the method in the test. That said, as #1040 is unresolved at this point, I have tested locally with a real existing case and it works well. Related to #1040. --- markdown/extensions/md_in_html.py | 12 ++++++++++++ markdown/postprocessors.py | 6 +++++- tests/test_syntax/extensions/test_md_in_html.py | 17 ++++++++++++++++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 3518d059f..174224ab9 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -17,6 +17,7 @@ from . import Extension from ..blockprocessors import BlockProcessor from ..preprocessors import Preprocessor +from ..postprocessors import RawHtmlPostprocessor from .. import util from ..htmlparser import HTMLExtractor import xml.etree.ElementTree as etree @@ -263,6 +264,15 @@ def run(self, parent, blocks): return False +class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor): + def stash_to_string(self, text): + """ Override default to handle any etree elements still in the stash. """ + if isinstance(text, etree.Element): + return self.md.serializer(text) + else: + return str(text) + + class MarkdownInHtmlExtension(Extension): """Add Markdown parsing in HTML to Markdown class.""" @@ -275,6 +285,8 @@ def extendMarkdown(self, md): md.parser.blockprocessors.register( MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105 ) + # Replace raw HTML postprocessor + md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30) def makeExtension(**kwargs): # pragma: no cover diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index cd32687d2..2e68cd9f8 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -69,7 +69,7 @@ def run(self, text): """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): - html = self.md.htmlStash.rawHtmlBlocks[i] + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) if self.isblocklevel(html): replacements["

{}

".format( self.md.htmlStash.get_placeholder(i))] = html @@ -95,6 +95,10 @@ def isblocklevel(self, html): return self.md.is_block_level(m.group(1)) return False + def stash_to_string(self, text): + """ Convert a stashed object to a string. """ + return str(text) + class AndSubstitutePostprocessor(Postprocessor): """ Restore valid entities """ diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py index b68412c8e..433cdd559 100644 --- a/tests/test_syntax/extensions/test_md_in_html.py +++ b/tests/test_syntax/extensions/test_md_in_html.py @@ -23,6 +23,21 @@ from unittest import TestSuite from markdown.test_tools import TestCase from ..blocks.test_html_blocks import TestHTMLBlocks +from markdown import Markdown +from xml.etree.ElementTree import Element + + +class TestMarkdownInHTMLPostProcessor(TestCase): + """ Ensure any remaining elements in HTML stash are properly serialized. """ + + def test_stash_to_string(self): + # There should be no known cases where this actually happens so we need to + # forcefully pass an etree Element to the method to ensure proper behavior. + element = Element('div') + element.text = 'Foo bar.' + md = Markdown(extensions=['md_in_html']) + result = md.postprocessors['raw_html'].stash_to_string(element) + self.assertEqual(result, '
Foo bar.
') class TestDefaultwMdInHTML(TestHTMLBlocks): @@ -758,7 +773,7 @@ def test_md1_nested_footnote_ref(self): def load_tests(loader, tests, pattern): ''' Ensure TestHTMLBlocks doesn't get run twice by excluding it here. ''' suite = TestSuite() - for test_class in [TestDefaultwMdInHTML, TestMdInHTML]: + for test_class in [TestDefaultwMdInHTML, TestMdInHTML, TestMarkdownInHTMLPostProcessor]: tests = loader.loadTestsFromTestCase(test_class) suite.addTests(tests) return suite