From 696d63ffd3b9e1bc116d3853fd3f883f15eb143c Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 19 Dec 2022 14:13:55 -0700 Subject: [PATCH] Tag Processor: Fix a problem backing up too far after updating HTML (#46598) A defect introduced in #46018 led to the tag processor backing up one index too far after flushing its queued changes on a document. For most operations this didn't cause any harm because when immediately moving forward after an update, the `next_tag()` returned to the same spot: it was backing up to one position before the current tag instead of at the start of the current tag. Unfortunately, when the current tag was the first in the document this would lead the processor to rewind to position `-1`, right before the start of the document, and lead to errors with `strpos()` when it received out-of-bounds indices. In this fix we're correcting the adjustment for the HTML tag's `<` and documenting the math in the file so that it's clearer why it's there and providing guidance should another fix be necessary. As supporting work to this patch we're making the text replacement sort stable, inside the tag processor, for when determining the order in which to apply text replacements. This isn't necessary for the runtime but is a nuissance for testing because different PHP versions produce different unstable sort orderings and this prevents that from causing the unit tests to fail in one version but pass in another. Props to @anton-vlasenko for finding this bug. Enforce sort stability when flushing out text replacements --- .../html/class-wp-html-tag-processor.php | 34 ++++++++-- phpunit/html/wp-html-tag-processor-test.php | 68 +++++++++---------- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index affbb6fb27b5c1..afdf6c66073cd5 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -1331,16 +1331,33 @@ public function seek( $bookmark_name ) { } /** - * Sort function to arrange objects with a start property in ascending order. + * Compare two WP_HTML_Text_Replacement objects. * * @since 6.2.0 * - * @param object $a First attribute update. - * @param object $b Second attribute update. + * @param WP_HTML_Text_Replacement $a First attribute update. + * @param WP_HTML_Text_Replacement $b Second attribute update. * @return integer */ private static function sort_start_ascending( $a, $b ) { - return $a->start - $b->start; + $by_start = $a->start - $b->start; + if ( 0 !== $by_start ) { + return $by_start; + } + + $by_text = isset( $a->text, $b->text ) ? strcmp( $a->text, $b->text ) : 0; + if ( 0 !== $by_text ) { + return $by_text; + } + + /* + * We shouldn't ever get here because it would imply + * that we have two identical updates, or that we're + * trying to replace the same input text twice. Still + * we'll handle this sort to preserve determinism, + * which might come in handy when debugging. + */ + return $a->end - $b->end; } /** @@ -1657,7 +1674,14 @@ public function get_updated_html() { $this->updated_bytes = strlen( $this->updated_html ); // 3. Point this tag processor at the original tag opener and consume it - $this->parsed_bytes = strlen( $updated_html_up_to_current_tag_name_end ) - $this->tag_name_length - 2; + + /* + * When we get here we're at the end of the tag name, and we want to rewind to before it + *

Previous HTMLMore HTML

+ * ^ | back up by the length of the tag name plus the opening < + * \<-/ back up by strlen("em") + 1 ==> 3 + */ + $this->parsed_bytes = strlen( $updated_html_up_to_current_tag_name_end ) - $this->tag_name_length - 1; $this->next_tag(); return $this->html; diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/wp-html-tag-processor-test.php index 6a850eb7a4edb2..cab14962491aca 100644 --- a/phpunit/html/wp-html-tag-processor-test.php +++ b/phpunit/html/wp-html-tag-processor-test.php @@ -1132,7 +1132,7 @@ public function data_malformed_tag() { $examples = array(); $examples['Invalid entity inside attribute value'] = array( 'test', - 'test', + 'test', ); $examples['HTML tag opening inside attribute value'] = array( @@ -1147,152 +1147,152 @@ public function data_malformed_tag() { $examples['Single and double quotes in attribute value'] = array( '

test', - '

test', + '

test', ); $examples['Unquoted attribute values'] = array( '


test', - '
test', + '
test', ); $examples['Double-quotes escaped in double-quote attribute value'] = array( '
test', - '
test', + '
test', ); $examples['Unquoted attribute value'] = array( '
test', - '
test', + '
test', ); $examples['Unquoted attribute value with tag-like value'] = array( '
>test', - '
>test', + '
>test', ); $examples['Unquoted attribute value with tag-like value followed by tag-like data'] = array( '
>test', - '
>test', + '
>test', ); $examples['1'] = array( '
test', - '
test', + '
test', ); $examples['2'] = array( '
test', - '
test', + '
test', ); $examples['4'] = array( '
test', - '
test', + '
test', ); $examples['5'] = array( '
code>test', - '
code>test', + '
code>test', ); $examples['6'] = array( '
test', - '
test', + '
test', ); $examples['7'] = array( '
test', - '
test', + '
test', ); $examples['8'] = array( '
id="test">test', - '
id="test">test', + '
id="test">test', ); $examples['9'] = array( '
test', - '
test', + '
test', ); $examples['10'] = array( 'test', - 'test', + 'test', ); $examples['11'] = array( 'The applicative operator <* works well in Haskell; is what?test', - 'The applicative operator <* works well in Haskell; is what?test', + 'The applicative operator <* works well in Haskell; is what?test', ); $examples['12'] = array( '<3 is a heart but is a tag.test', - '<3 is a heart but is a tag.test', + '<3 is a heart but is a tag.test', ); $examples['13'] = array( 'test', - 'test', + 'test', ); $examples['14'] = array( 'test', - 'test', + 'test', ); $examples['15'] = array( ' a HTML Tag]]>test', - ' a HTML Tag]]>test', + ' a HTML Tag]]>test', ); $examples['16'] = array( '
test', - '
test', + '
test', ); $examples['17'] = array( '
test', - '
test', + '
test', ); $examples['18'] = array( '
test', - '
test', + '
test', ); $examples['19'] = array( '
test', - '
test', + '
test', ); $examples['20'] = array( '
test', - '
test', + '
test', ); $examples['21'] = array( '
test', - '
test', + '
test', ); $examples['22'] = array( '
test', - '
test', + '
test', ); $examples['23'] = array( '
test', - '
test', + '
test', ); $examples['24'] = array( '
test', - '
test', + '
test', ); $examples['25'] = array( '
test', - '
test', + '
test', ); $examples['Multiple unclosed tags treated as a single tag'] = array( @@ -1306,7 +1306,7 @@ public function data_malformed_tag() { HTML , <<test', - '
test', + '
test', ); $examples['28'] = array( '
test', - '
test', + '
test', ); return $examples;