From d4d06eda40e2a39003032c882730a1d869d4af10 Mon Sep 17 00:00:00 2001
From: Ron S
Date: Sat, 22 May 2021 18:24:18 -0400
Subject: [PATCH 1/4] fix issue taoqf#115
---
src/nodes/html.ts | 38 +++++++++++++++++++++++++++++++++++++-
test/html.js | 8 +++++++-
2 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/src/nodes/html.ts b/src/nodes/html.ts
index 4d1f21b..2e51f75 100644
--- a/src/nodes/html.ts
+++ b/src/nodes/html.ts
@@ -133,6 +133,42 @@ export default class HTMLElement extends Node {
return JSON.stringify(attr.replace(/"/g, '"'));
}
+
+ /**
+ * Trim all whitespace except single leading/trailing non-breaking space
+ * @param text string to trim
+ * @returns {string} trimmed value
+ * @private
+ */
+ private trimTextNodeWhitespace(text: string): string {
+ let i = 0;
+ let startPos;
+ let endPos;
+
+ while (i >= 0 && i < text.length) {
+ if (/\S/.test(text[i])) {
+ if (startPos === undefined) {
+ startPos = i;
+ i = text.length;
+ } else {
+ endPos = i;
+ i = void 0;
+ }
+ }
+
+ if (startPos === undefined) i++;
+ else i--;
+ }
+
+ if (startPos === undefined) startPos = 0;
+ if (endPos === undefined) endPos = text.length - 1;
+
+ const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
+ const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
+
+ return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
+ }
+
/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
@@ -401,7 +437,7 @@ export default class HTMLElement extends Node {
if ((node as TextNode).isWhitespace) {
return;
}
- node.rawText = node.rawText.trim();
+ node.rawText = this.trimTextNodeWhitespace(node.rawText);
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
(node as HTMLElement).removeWhitespace();
}
diff --git a/test/html.js b/test/html.js
index 6c9b8de..f8c6c01 100644
--- a/test/html.js
+++ b/test/html.js
@@ -198,7 +198,7 @@ describe('HTML Parser', function () {
describe('#removeWhitespace()', function () {
it('should remove whitespaces while preserving nodes with content', function () {
- const root = parseHTML(' \r \n \t
123
');
+ const root = parseHTML(' \r \n \t
123
');
const p = new HTMLElement('p', {}, '', root);
p.appendChild(new HTMLElement('h5', {}, ''))
@@ -206,6 +206,12 @@ describe('HTML Parser', function () {
root.firstChild.removeWhitespace().should.eql(p);
});
+
+ it('should preserve legitimate leading/trailing whitespace in TextNode', function () {
+ parseHTML('Hello World!
').removeWhitespace().firstChild.text.should.eql('Hello World!');
+ parseHTML('\t\nHello\n\tWorld!
').removeWhitespace().firstChild.text.should.eql('HelloWorld!');
+ parseHTML('\t\n Hello \n\tWorld!
').removeWhitespace().firstChild.text.should.eql(' Hello World!');
+ });
});
describe('#rawAttributes', function () {
From b75a51d0279f1a1f0ae382e78e712192b40ef024 Mon Sep 17 00:00:00 2001
From: Ron S
Date: Sat, 22 May 2021 18:58:49 -0400
Subject: [PATCH 2/4] Refactored to implement with structuredText
---
src/nodes/html.ts | 39 ++-------------------------------------
src/nodes/text.ts | 39 +++++++++++++++++++++++++++++++++++++++
test/html.js | 2 +-
3 files changed, 42 insertions(+), 38 deletions(-)
diff --git a/src/nodes/html.ts b/src/nodes/html.ts
index 2e51f75..10a9a8c 100644
--- a/src/nodes/html.ts
+++ b/src/nodes/html.ts
@@ -134,41 +134,6 @@ export default class HTMLElement extends Node {
return JSON.stringify(attr.replace(/"/g, '"'));
}
- /**
- * Trim all whitespace except single leading/trailing non-breaking space
- * @param text string to trim
- * @returns {string} trimmed value
- * @private
- */
- private trimTextNodeWhitespace(text: string): string {
- let i = 0;
- let startPos;
- let endPos;
-
- while (i >= 0 && i < text.length) {
- if (/\S/.test(text[i])) {
- if (startPos === undefined) {
- startPos = i;
- i = text.length;
- } else {
- endPos = i;
- i = void 0;
- }
- }
-
- if (startPos === undefined) i++;
- else i--;
- }
-
- if (startPos === undefined) startPos = 0;
- if (endPos === undefined) endPos = text.length - 1;
-
- const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
- const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
-
- return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
- }
-
/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
@@ -296,7 +261,7 @@ export default class HTMLElement extends Node {
// Whitespace node, postponed output
currentBlock.prependWhitespace = true;
} else {
- let text = node.text;
+ let text = (node).trimmedText;
if (currentBlock.prependWhitespace) {
text = ` ${text}`;
currentBlock.prependWhitespace = false;
@@ -437,7 +402,7 @@ export default class HTMLElement extends Node {
if ((node as TextNode).isWhitespace) {
return;
}
- node.rawText = this.trimTextNodeWhitespace(node.rawText);
+ node.rawText = (node).trimmedText;
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
(node as HTMLElement).removeWhitespace();
}
diff --git a/src/nodes/text.ts b/src/nodes/text.ts
index 068666a..eabfa50 100644
--- a/src/nodes/text.ts
+++ b/src/nodes/text.ts
@@ -17,6 +17,45 @@ export default class TextNode extends Node {
*/
public nodeType = NodeType.TEXT_NODE;
+ private _trimmedText?: string;
+
+ /**
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
+ */
+ public get trimmedText() {
+ if (this._trimmedText !== undefined) return this._trimmedText;
+
+ const text = this.rawText;
+ let i = 0;
+ let startPos;
+ let endPos;
+
+ while (i >= 0 && i < text.length) {
+ if (/\S/.test(text[i])) {
+ if (startPos === undefined) {
+ startPos = i;
+ i = text.length;
+ } else {
+ endPos = i;
+ i = void 0;
+ }
+ }
+
+ if (startPos === undefined) i++;
+ else i--;
+ }
+
+ if (startPos === undefined) startPos = 0;
+ if (endPos === undefined) endPos = text.length - 1;
+
+ const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
+ const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
+
+ this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
+
+ return this._trimmedText;
+ }
+
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
diff --git a/test/html.js b/test/html.js
index f8c6c01..19eaff3 100644
--- a/test/html.js
+++ b/test/html.js
@@ -202,7 +202,7 @@ describe('HTML Parser', function () {
const p = new HTMLElement('p', {}, '', root);
p.appendChild(new HTMLElement('h5', {}, ''))
- .appendChild(new TextNode('123'));
+ .appendChild(Object.assign(new TextNode('123'), { _trimmedText: '123' }));
root.firstChild.removeWhitespace().should.eql(p);
});
From 0195dd3abb08469cf320835947d080cccb9e239e Mon Sep 17 00:00:00 2001
From: Ron S
Date: Sat, 22 May 2021 19:09:19 -0400
Subject: [PATCH 3/4] style: Normalize indents to tab
---
src/nodes/text.ts | 56 +++++++++++++++++++++++------------------------
test/html.js | 8 +++----
2 files changed, 32 insertions(+), 32 deletions(-)
diff --git a/src/nodes/text.ts b/src/nodes/text.ts
index eabfa50..9b473d7 100644
--- a/src/nodes/text.ts
+++ b/src/nodes/text.ts
@@ -19,42 +19,42 @@ export default class TextNode extends Node {
private _trimmedText?: string;
- /**
- * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
- */
+ /**
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
+ */
public get trimmedText() {
- if (this._trimmedText !== undefined) return this._trimmedText;
+ if (this._trimmedText !== undefined) return this._trimmedText;
- const text = this.rawText;
- let i = 0;
- let startPos;
- let endPos;
+ const text = this.rawText;
+ let i = 0;
+ let startPos;
+ let endPos;
- while (i >= 0 && i < text.length) {
- if (/\S/.test(text[i])) {
- if (startPos === undefined) {
- startPos = i;
- i = text.length;
- } else {
- endPos = i;
- i = void 0;
- }
- }
+ while (i >= 0 && i < text.length) {
+ if (/\S/.test(text[i])) {
+ if (startPos === undefined) {
+ startPos = i;
+ i = text.length;
+ } else {
+ endPos = i;
+ i = void 0;
+ }
+ }
- if (startPos === undefined) i++;
- else i--;
- }
+ if (startPos === undefined) i++;
+ else i--;
+ }
- if (startPos === undefined) startPos = 0;
- if (endPos === undefined) endPos = text.length - 1;
+ if (startPos === undefined) startPos = 0;
+ if (endPos === undefined) endPos = text.length - 1;
- const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
- const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
+ const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
+ const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
- this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
+ this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
- return this._trimmedText;
- }
+ return this._trimmedText;
+ }
/**
* Get unescaped text value of current node and its children.
diff --git a/test/html.js b/test/html.js
index 19eaff3..0b898e8 100644
--- a/test/html.js
+++ b/test/html.js
@@ -208,10 +208,10 @@ describe('HTML Parser', function () {
});
it('should preserve legitimate leading/trailing whitespace in TextNode', function () {
- parseHTML('Hello World!
').removeWhitespace().firstChild.text.should.eql('Hello World!');
- parseHTML('\t\nHello\n\tWorld!
').removeWhitespace().firstChild.text.should.eql('HelloWorld!');
- parseHTML('\t\n Hello \n\tWorld!
').removeWhitespace().firstChild.text.should.eql(' Hello World!');
- });
+ parseHTML('Hello World!
').removeWhitespace().firstChild.text.should.eql('Hello World!');
+ parseHTML('\t\nHello\n\tWorld!
').removeWhitespace().firstChild.text.should.eql('HelloWorld!');
+ parseHTML('\t\n Hello \n\tWorld!
').removeWhitespace().firstChild.text.should.eql(' Hello World!');
+ });
});
describe('#rawAttributes', function () {
From d914efad83817511becf7331f60fd79d1a807446 Mon Sep 17 00:00:00 2001
From: Ron S
Date: Sat, 22 May 2021 19:34:51 -0400
Subject: [PATCH 4/4] test: Improve testing
---
test/html.js | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/test/html.js b/test/html.js
index 0b898e8..e84c20b 100644
--- a/test/html.js
+++ b/test/html.js
@@ -126,10 +126,10 @@ describe('HTML Parser', function () {
const script = root.firstChild;
const style = root.lastChild;
script.childNodes.should.not.be.empty;
- script.childNodes.should.eql([new TextNode('1', script)]);
+ script.childNodes.should.eql([ new TextNode('1', script) ]);
script.text.should.eql('1');
style.childNodes.should.not.be.empty;
- style.childNodes.should.eql([new TextNode('2&', style)]);
+ style.childNodes.should.eql([ new TextNode('2&', style) ]);
style.text.should.eql('2&');
style.rawText.should.eql('2&');
});
@@ -198,11 +198,16 @@ describe('HTML Parser', function () {
describe('#removeWhitespace()', function () {
it('should remove whitespaces while preserving nodes with content', function () {
- const root = parseHTML(' \r \n \t
123
');
+ const root = parseHTML(' \r \n \t
123
');
+
+ const textNode = new TextNode(' 123 ');
+ textNode.rawText = textNode.trimmedText;
+ textNode.rawText.should.eql(' 123 ');
const p = new HTMLElement('p', {}, '', root);
- p.appendChild(new HTMLElement('h5', {}, ''))
- .appendChild(Object.assign(new TextNode('123'), { _trimmedText: '123' }));
+ p
+ .appendChild(new HTMLElement('h5', {}, ''))
+ .appendChild(textNode);
root.firstChild.removeWhitespace().should.eql(p);
});