Skip to content

Commit

Permalink
fixed correct encoding when xml pi is in content, fixes #46
Browse files Browse the repository at this point in the history
  • Loading branch information
Rct567 committed Jan 18, 2024
1 parent 20e2e25 commit b3357e7
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
9 changes: 6 additions & 3 deletions src/Rct567/DomQuery/DomQueryNodes.php
Original file line number Diff line number Diff line change
Expand Up @@ -353,16 +353,19 @@ public function addDomNode(\DOMNode $dom_node, $prepend=false)
*/
public function loadContent(string $content, $encoding='UTF-8')
{

$this->preserve_no_newlines = (strpos($content, '<') !== false && strpos($content, "\n") === false);

$content_has_leading_pi = stripos($content, '<?xml') === 0;

if (!\is_bool($this->xml_mode)) {
$this->xml_mode = (stripos($content, '<?xml') === 0);
$this->xml_mode = $content_has_leading_pi;
}

$this->xml_print_pi = (stripos($content, '<?xml') === 0);
$this->xml_print_pi = $content_has_leading_pi;

$xml_pi_node_added = false;
if (!$this->xml_mode && $encoding && stripos($content, '<?xml') === false) {
if (!$this->xml_mode && $encoding && !$content_has_leading_pi) {
$content = '<?xml encoding="'.$encoding.'">'.$content; // add pi node to make libxml use the correct encoding
$xml_pi_node_added = true;
}
Expand Down
29 changes: 27 additions & 2 deletions tests/Rct567/DomQuery/Tests/DomQueryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,33 @@ public function testLoadingUf8AndGettingSameContent()
$this->assertEquals('Iñtërnâtiônàlizætiøn', $dom->text());
}

/*
* Test loading utf8 html with irrelevant pi in content
*/
public function testLoadingUf8AndGettingSameContentWithPiInContent()
{
$html = '<div><h1>Iñtërnâtiônàlizætiøn</h1></div><a>k</a><?xml version="1.0" encoding="iso-8859-1"?>';
$dom = new DomQuery($html);

$this->assertEquals($html, (string) $dom); // same result
$this->assertEquals('<h1>Iñtërnâtiônàlizætiøn</h1>', (string) $dom->find('h1')); // same header
}

/*
* Test loading utf8 content with leading pi
*/
public function testLoadingUf8AndGettingSameContentWithLeadingPi()
{
$html = '<?xml version="1.0" encoding="UTF-8"?><div><h1>Iñtërnâtiônàlizætiøn</h1></div>';
$dom = new DomQuery($html);

$this->assertTrue($dom->xml_mode);
$this->assertEquals($html, (string) $dom); // same result
$this->assertEquals('<h1>Iñtërnâtiônàlizætiøn</h1>', (string) $dom->find('h1')); // same header
$dom->xml_mode = false;
$this->assertEquals('<div><h1>Iñtërnâtiônàlizætiøn</h1></div>', (string) $dom); // without pi
}

/*
* Test loading html with new lines
*/
Expand All @@ -121,8 +148,6 @@ public function testPreserverAttributeWithoutValue()
$this->assertEquals('<div selected>a</div>', (string) $dom);
}



/*
* Test change attribute without value in xml write mode
*/
Expand Down

0 comments on commit b3357e7

Please sign in to comment.