No public description

PiperOrigin-RevId: 658781590
google · Aug 5, 2024 · 7899cd1 · 7899cd1
1 parent 24a2851
commit 7899cd1
Show file tree

Hide file tree

Showing 6 changed files with 226 additions and 79 deletions.
diff --git a/src/builders/html_sanitizer/css/sanitizer.ts b/src/builders/html_sanitizer/css/sanitizer.ts
@@ -16,7 +16,7 @@
  *    that bug and possibly other ones.
  */
 
-import {safeStyleEl} from '../../../dom/index.js';
+import {setTextContent} from '../../../dom/elements/style.js';
 import {createStyleSheetInternal} from '../../../internals/style_sheet_impl.js';
 import {
   ResourceUrlPolicy,
@@ -51,7 +51,7 @@ class CssSanitizer {
   private getStyleSheet(cssText: string): CSSStyleSheet {
     const style = this.inertDocument.createElement('style');
     const safeStyle = createStyleSheetInternal(cssText);
-    safeStyleEl.setTextContent(style, safeStyle);
+    setTextContent(style, safeStyle);
     this.inertDocument.head.appendChild(style);
     const sheet = style.sheet!; // guaranteed to be non-null
     style.remove();

diff --git a/src/builders/html_sanitizer/css/tokenizer.ts b/src/builders/html_sanitizer/css/tokenizer.ts
@@ -76,23 +76,23 @@ class Tokenizer {
    *
    * https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#next-input-code-point
    */
-  private get nextInputCodePoint(): string | EOF {
+  private nextInputCodePoint(): string | EOF {
     return this.css[this.pos];
   }
 
-  private get nextTwoInputCodePoints(): [string | EOF, string | EOF] {
+  private nextTwoInputCodePoints(): [string | EOF, string | EOF] {
     return [this.css[this.pos], this.css[this.pos + 1]];
   }
 
-  private get nextThreeInputCodePoints(): [
+  private nextThreeInputCodePoints(): [
     string | EOF,
     string | EOF,
     string | EOF,
   ] {
     return [this.css[this.pos], this.css[this.pos + 1], this.css[this.pos + 2]];
   }
 
-  private get currentInputCodePoint(): string | EOF {
+  private currentInputCodePoint(): string | EOF {
     return this.css[this.pos - 1];
   }
 
@@ -136,7 +136,7 @@ class Tokenizer {
       // ":ho st", which is safe.
       return {tokenKind: CssTokenKind.WHITESPACE};
     }
-    const codePoint = this.nextInputCodePoint;
+    const codePoint = this.nextInputCodePoint();
     this.consumeTheNextInputCodePoint();
     if (codePoint === EOF) {
       return {tokenKind: CssTokenKind.EOF};
@@ -147,8 +147,8 @@ class Tokenizer {
       return this.consumeString(codePoint);
     } else if (codePoint === '#') {
       if (
-        this.isIdentCodePoint(this.nextInputCodePoint) ||
-        this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints)
+        this.isIdentCodePoint(this.nextInputCodePoint()) ||
+        this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints())
       ) {
         // In spec there's also a step to check if the next three code points
         // would start an ident sequence. However, the only reason to do so
@@ -208,7 +208,7 @@ class Tokenizer {
     } else if (codePoint === '@') {
       if (
         this.threeCodePointsWouldStartAnIdentSequence(
-          ...this.nextThreeInputCodePoints,
+          ...this.nextThreeInputCodePoints(),
         )
       ) {
         const ident = this.consumeIdentSequence();
@@ -271,7 +271,7 @@ class Tokenizer {
       value: '',
     };
     while (true) {
-      const codePoint = this.nextInputCodePoint;
+      const codePoint = this.nextInputCodePoint();
       this.consumeTheNextInputCodePoint();
       if (codePoint === EOF || codePoint === quote) {
         return stringToken;
@@ -283,10 +283,10 @@ class Tokenizer {
         stringToken.value = '';
         return stringToken;
       } else if (codePoint === '\\') {
-        if (this.nextInputCodePoint === EOF) {
+        if (this.nextInputCodePoint() === EOF) {
           // > If the next input code point is EOF, do nothing.
           continue;
-        } else if (this.isNewline(this.nextInputCodePoint)) {
+        } else if (this.isNewline(this.nextInputCodePoint())) {
           this.consumeTheNextInputCodePoint();
         } else {
           const escapedCodePoint = this.consumeEscapedCodePoint();
@@ -300,7 +300,7 @@ class Tokenizer {
 
   /** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-escaped-code-point */
   private consumeEscapedCodePoint(): string {
-    const codePoint = this.nextInputCodePoint;
+    const codePoint = this.nextInputCodePoint();
     this.consumeTheNextInputCodePoint();
     if (codePoint === EOF) {
       return '\ufffd';
@@ -311,12 +311,15 @@ class Tokenizer {
       // The spec assumes here that the first hex digit has already been
       // consumed. So in fact, the maximum number of hex digits that can be
       // consumed is 6.
-      while (this.isHexDigit(this.nextInputCodePoint) && hexDigits.length < 6) {
-        hexDigits += this.nextInputCodePoint;
+      while (
+        this.isHexDigit(this.nextInputCodePoint()) &&
+        hexDigits.length < 6
+      ) {
+        hexDigits += this.nextInputCodePoint();
         this.consumeTheNextInputCodePoint();
       }
       // Whitespace directly following an escape sequence is ignored.
-      if (this.isWhitespace(this.nextInputCodePoint)) {
+      if (this.isWhitespace(this.nextInputCodePoint())) {
         this.consumeTheNextInputCodePoint();
       }
       // Needed to parse hexadecimal.
@@ -329,7 +332,7 @@ class Tokenizer {
   }
 
   private consumeAsMuchWhitespaceAsPossible() {
-    while (this.isWhitespace(this.nextInputCodePoint)) {
+    while (this.isWhitespace(this.nextInputCodePoint())) {
       this.consumeTheNextInputCodePoint();
     }
   }
@@ -338,9 +341,9 @@ class Tokenizer {
   private consumeIdentSequence(): string {
     let result = '';
     while (true) {
-      const codePoint = this.nextInputCodePoint;
+      const codePoint = this.nextInputCodePoint();
       this.consumeTheNextInputCodePoint();
-      const codePoint2 = this.nextInputCodePoint;
+      const codePoint2 = this.nextInputCodePoint();
       if (this.isIdentCodePoint(codePoint)) {
         result += codePoint;
       } else if (this.twoCodePointsAreValidEscape(codePoint, codePoint2)) {
@@ -355,15 +358,15 @@ class Tokenizer {
   /** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-ident-like-token */
   private consumeIdentLikeToken(): CssToken | CssToken[] {
     const ident = this.consumeIdentSequence();
-    if (/^url$/i.test(ident) && this.nextInputCodePoint === '(') {
+    if (/^url$/i.test(ident) && this.nextInputCodePoint() === '(') {
       // TODO(securitymb): This algorithm may look a little weird but we're
       // following the spec here exactly. We will see later on if this can be
       // optimized.
       this.consumeTheNextInputCodePoint();
       while (this.nextTwoInputsPointsAreWhitespace()) {
         this.consumeTheNextInputCodePoint();
       }
-      const nextTwo = this.nextTwoInputCodePoints;
+      const nextTwo = this.nextTwoInputCodePoints();
       if (
         (this.isWhitespace(nextTwo[0]) &&
           (nextTwo[1] === '"' || nextTwo[1] === "'")) ||
@@ -376,7 +379,7 @@ class Tokenizer {
       } else {
         return this.consumeUrlToken();
       }
-    } else if (this.nextInputCodePoint === '(') {
+    } else if (this.nextInputCodePoint() === '(') {
       this.consumeTheNextInputCodePoint();
       // We lowercase the function name because function names are
       // case-insensitive in CSS.
@@ -413,15 +416,15 @@ class Tokenizer {
     let url = '';
     this.consumeAsMuchWhitespaceAsPossible();
     while (true) {
-      const codePoint = this.nextInputCodePoint;
+      const codePoint = this.nextInputCodePoint();
       this.consumeTheNextInputCodePoint();
       if (codePoint === ')' || codePoint === EOF) {
         return this.createFunctionUrlToken(url);
       } else if (this.isWhitespace(codePoint)) {
         this.consumeAsMuchWhitespaceAsPossible();
         if (
-          this.nextInputCodePoint === ')' ||
-          this.nextInputCodePoint === EOF
+          this.nextInputCodePoint() === ')' ||
+          this.nextInputCodePoint() === EOF
         ) {
           this.consumeTheNextInputCodePoint();
           return this.createFunctionUrlToken(url);
@@ -462,7 +465,7 @@ class Tokenizer {
   /** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-the-remnants-of-a-bad-url */
   private consumeRemnantsOfBadUrl() {
     while (true) {
-      const codePoint = this.nextInputCodePoint;
+      const codePoint = this.nextInputCodePoint();
       this.consumeTheNextInputCodePoint();
       if (codePoint === EOF || codePoint === ')') {
         return;
@@ -484,23 +487,23 @@ class Tokenizer {
   private consumeNumber(): string {
     let repr = '';
     {
-      const next = this.nextInputCodePoint;
+      const next = this.nextInputCodePoint();
       if (next === '+' || next === '-') {
         this.consumeTheNextInputCodePoint();
         repr += next;
       }
     }
     repr += this.consumeDigits();
     {
-      const next = this.nextInputCodePoint;
+      const next = this.nextInputCodePoint();
       const next2 = this.css[this.pos + 1];
       if (next === '.' && this.isDigit(next2)) {
         this.consumeTheNextInputCodePoint();
         repr += '.' + this.consumeDigits();
       }
     }
     {
-      const next = this.nextInputCodePoint;
+      const next = this.nextInputCodePoint();
       const next2 = this.css[this.pos + 1];
       const next3 = this.css[this.pos + 2];
       if (next === 'e' || next === 'E') {
@@ -518,8 +521,8 @@ class Tokenizer {
 
   private consumeDigits(): string {
     let repr = '';
-    while (this.isDigit(this.nextInputCodePoint)) {
-      repr += this.nextInputCodePoint;
+    while (this.isDigit(this.nextInputCodePoint())) {
+      repr += this.nextInputCodePoint();
       this.consumeTheNextInputCodePoint();
     }
     return repr;
@@ -533,7 +536,7 @@ class Tokenizer {
     const repr = this.consumeNumber();
     if (
       this.threeCodePointsWouldStartAnIdentSequence(
-        ...this.nextThreeInputCodePoints,
+        ...this.nextThreeInputCodePoints(),
       )
     ) {
       return {
@@ -542,15 +545,15 @@ class Tokenizer {
         dimension: this.consumeIdentSequence(),
       };
     }
-    if (this.nextInputCodePoint === '%') {
+    if (this.nextInputCodePoint() === '%') {
       this.consumeTheNextInputCodePoint();
       return {tokenKind: CssTokenKind.PERCENTAGE, repr};
     }
     return {tokenKind: CssTokenKind.NUMBER, repr};
   }
 
   private nextTwoInputsPointsAreWhitespace() {
-    return this.nextTwoInputCodePoints.every((c) => this.isWhitespace(c));
+    return this.nextTwoInputCodePoints().every((c) => this.isWhitespace(c));
   }
 
   /** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#check-if-two-code-points-are-a-valid-escape */
@@ -563,8 +566,8 @@ class Tokenizer {
 
   private streamStartsWithValidEscape() {
     return this.twoCodePointsAreValidEscape(
-      this.currentInputCodePoint,
-      this.nextInputCodePoint,
+      this.currentInputCodePoint(),
+      this.nextInputCodePoint(),
     );
   }
 
@@ -588,8 +591,8 @@ class Tokenizer {
 
   private streamStartsWithANumber() {
     return this.threeCodePointsWouldStartANumber(
-      this.currentInputCodePoint,
-      ...this.nextTwoInputCodePoints,
+      this.currentInputCodePoint(),
+      ...this.nextTwoInputCodePoints(),
     );
   }
 
@@ -618,8 +621,8 @@ class Tokenizer {
 
   private streamStartsWithAnIdentSequence() {
     return this.threeCodePointsWouldStartAnIdentSequence(
-      this.currentInputCodePoint,
-      ...this.nextTwoInputCodePoints,
+      this.currentInputCodePoint(),
+      ...this.nextTwoInputCodePoints(),
     );
   }
 

diff --git a/src/builders/html_sanitizer/default_css_sanitizer.ts b/src/builders/html_sanitizer/default_css_sanitizer.ts
@@ -0,0 +1,22 @@
+/**
+ * @license
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/**
+ * @fileoverview This file exports a default instance of the CSS sanitizer,
+ * similarly to how the default instance of the HTML sanitizer is exported.
+ *
+ * The reason why it's in a separate file is to ensure that html_sanitizer.ts
+ * doesn't depend on html_sanitizer_builder.ts, which would cause
+ * a circular dependency.
+ */
+
+import {pure} from '../../internals/pure.js';
+import {CssSanitizerBuilder} from './html_sanitizer_builder.js';
+const defaultCssSanitizer = /* #__PURE__ */ pure(() =>
+  new CssSanitizerBuilder().build(),
+);
+/** Sanitizes untrusted CSS using the default sanitizer configuration. */
+export function sanitizeHtmlWithCss(css: string): DocumentFragment {
+  return defaultCssSanitizer.sanitizeToFragment(css);
+}