Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 658781590
  • Loading branch information
securityMB authored and copybara-github committed Aug 5, 2024
1 parent 24a2851 commit 7899cd1
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 79 deletions.
4 changes: 2 additions & 2 deletions src/builders/html_sanitizer/css/sanitizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* that bug and possibly other ones.
*/

import {safeStyleEl} from '../../../dom/index.js';
import {setTextContent} from '../../../dom/elements/style.js';
import {createStyleSheetInternal} from '../../../internals/style_sheet_impl.js';
import {
ResourceUrlPolicy,
Expand Down Expand Up @@ -51,7 +51,7 @@ class CssSanitizer {
private getStyleSheet(cssText: string): CSSStyleSheet {
const style = this.inertDocument.createElement('style');
const safeStyle = createStyleSheetInternal(cssText);
safeStyleEl.setTextContent(style, safeStyle);
setTextContent(style, safeStyle);
this.inertDocument.head.appendChild(style);
const sheet = style.sheet!; // guaranteed to be non-null
style.remove();
Expand Down
81 changes: 42 additions & 39 deletions src/builders/html_sanitizer/css/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,23 @@ class Tokenizer {
*
* https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#next-input-code-point
*/
private get nextInputCodePoint(): string | EOF {
private nextInputCodePoint(): string | EOF {
return this.css[this.pos];
}

private get nextTwoInputCodePoints(): [string | EOF, string | EOF] {
private nextTwoInputCodePoints(): [string | EOF, string | EOF] {
return [this.css[this.pos], this.css[this.pos + 1]];
}

private get nextThreeInputCodePoints(): [
private nextThreeInputCodePoints(): [
string | EOF,
string | EOF,
string | EOF,
] {
return [this.css[this.pos], this.css[this.pos + 1], this.css[this.pos + 2]];
}

private get currentInputCodePoint(): string | EOF {
private currentInputCodePoint(): string | EOF {
return this.css[this.pos - 1];
}

Expand Down Expand Up @@ -136,7 +136,7 @@ class Tokenizer {
// ":ho st", which is safe.
return {tokenKind: CssTokenKind.WHITESPACE};
}
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF) {
return {tokenKind: CssTokenKind.EOF};
Expand All @@ -147,8 +147,8 @@ class Tokenizer {
return this.consumeString(codePoint);
} else if (codePoint === '#') {
if (
this.isIdentCodePoint(this.nextInputCodePoint) ||
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints)
this.isIdentCodePoint(this.nextInputCodePoint()) ||
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints())
) {
// In spec there's also a step to check if the next three code points
// would start an ident sequence. However, the only reason to do so
Expand Down Expand Up @@ -208,7 +208,7 @@ class Tokenizer {
} else if (codePoint === '@') {
if (
this.threeCodePointsWouldStartAnIdentSequence(
...this.nextThreeInputCodePoints,
...this.nextThreeInputCodePoints(),
)
) {
const ident = this.consumeIdentSequence();
Expand Down Expand Up @@ -271,7 +271,7 @@ class Tokenizer {
value: '',
};
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF || codePoint === quote) {
return stringToken;
Expand All @@ -283,10 +283,10 @@ class Tokenizer {
stringToken.value = '';
return stringToken;
} else if (codePoint === '\\') {
if (this.nextInputCodePoint === EOF) {
if (this.nextInputCodePoint() === EOF) {
// > If the next input code point is EOF, do nothing.
continue;
} else if (this.isNewline(this.nextInputCodePoint)) {
} else if (this.isNewline(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
} else {
const escapedCodePoint = this.consumeEscapedCodePoint();
Expand All @@ -300,7 +300,7 @@ class Tokenizer {

/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-escaped-code-point */
private consumeEscapedCodePoint(): string {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF) {
return '\ufffd';
Expand All @@ -311,12 +311,15 @@ class Tokenizer {
// The spec assumes here that the first hex digit has already been
// consumed. So in fact, the maximum number of hex digits that can be
// consumed is 6.
while (this.isHexDigit(this.nextInputCodePoint) && hexDigits.length < 6) {
hexDigits += this.nextInputCodePoint;
while (
this.isHexDigit(this.nextInputCodePoint()) &&
hexDigits.length < 6
) {
hexDigits += this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
}
// Whitespace directly following an escape sequence is ignored.
if (this.isWhitespace(this.nextInputCodePoint)) {
if (this.isWhitespace(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
}
// Needed to parse hexadecimal.
Expand All @@ -329,7 +332,7 @@ class Tokenizer {
}

private consumeAsMuchWhitespaceAsPossible() {
while (this.isWhitespace(this.nextInputCodePoint)) {
while (this.isWhitespace(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
}
}
Expand All @@ -338,9 +341,9 @@ class Tokenizer {
private consumeIdentSequence(): string {
let result = '';
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
const codePoint2 = this.nextInputCodePoint;
const codePoint2 = this.nextInputCodePoint();
if (this.isIdentCodePoint(codePoint)) {
result += codePoint;
} else if (this.twoCodePointsAreValidEscape(codePoint, codePoint2)) {
Expand All @@ -355,15 +358,15 @@ class Tokenizer {
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-ident-like-token */
private consumeIdentLikeToken(): CssToken | CssToken[] {
const ident = this.consumeIdentSequence();
if (/^url$/i.test(ident) && this.nextInputCodePoint === '(') {
if (/^url$/i.test(ident) && this.nextInputCodePoint() === '(') {
// TODO(securitymb): This algorithm may look a little weird but we're
// following the spec here exactly. We will see later on if this can be
// optimized.
this.consumeTheNextInputCodePoint();
while (this.nextTwoInputsPointsAreWhitespace()) {
this.consumeTheNextInputCodePoint();
}
const nextTwo = this.nextTwoInputCodePoints;
const nextTwo = this.nextTwoInputCodePoints();
if (
(this.isWhitespace(nextTwo[0]) &&
(nextTwo[1] === '"' || nextTwo[1] === "'")) ||
Expand All @@ -376,7 +379,7 @@ class Tokenizer {
} else {
return this.consumeUrlToken();
}
} else if (this.nextInputCodePoint === '(') {
} else if (this.nextInputCodePoint() === '(') {
this.consumeTheNextInputCodePoint();
// We lowercase the function name because function names are
// case-insensitive in CSS.
Expand Down Expand Up @@ -413,15 +416,15 @@ class Tokenizer {
let url = '';
this.consumeAsMuchWhitespaceAsPossible();
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === ')' || codePoint === EOF) {
return this.createFunctionUrlToken(url);
} else if (this.isWhitespace(codePoint)) {
this.consumeAsMuchWhitespaceAsPossible();
if (
this.nextInputCodePoint === ')' ||
this.nextInputCodePoint === EOF
this.nextInputCodePoint() === ')' ||
this.nextInputCodePoint() === EOF
) {
this.consumeTheNextInputCodePoint();
return this.createFunctionUrlToken(url);
Expand Down Expand Up @@ -462,7 +465,7 @@ class Tokenizer {
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-the-remnants-of-a-bad-url */
private consumeRemnantsOfBadUrl() {
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF || codePoint === ')') {
return;
Expand All @@ -484,23 +487,23 @@ class Tokenizer {
private consumeNumber(): string {
let repr = '';
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
if (next === '+' || next === '-') {
this.consumeTheNextInputCodePoint();
repr += next;
}
}
repr += this.consumeDigits();
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
const next2 = this.css[this.pos + 1];
if (next === '.' && this.isDigit(next2)) {
this.consumeTheNextInputCodePoint();
repr += '.' + this.consumeDigits();
}
}
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
const next2 = this.css[this.pos + 1];
const next3 = this.css[this.pos + 2];
if (next === 'e' || next === 'E') {
Expand All @@ -518,8 +521,8 @@ class Tokenizer {

private consumeDigits(): string {
let repr = '';
while (this.isDigit(this.nextInputCodePoint)) {
repr += this.nextInputCodePoint;
while (this.isDigit(this.nextInputCodePoint())) {
repr += this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
}
return repr;
Expand All @@ -533,7 +536,7 @@ class Tokenizer {
const repr = this.consumeNumber();
if (
this.threeCodePointsWouldStartAnIdentSequence(
...this.nextThreeInputCodePoints,
...this.nextThreeInputCodePoints(),
)
) {
return {
Expand All @@ -542,15 +545,15 @@ class Tokenizer {
dimension: this.consumeIdentSequence(),
};
}
if (this.nextInputCodePoint === '%') {
if (this.nextInputCodePoint() === '%') {
this.consumeTheNextInputCodePoint();
return {tokenKind: CssTokenKind.PERCENTAGE, repr};
}
return {tokenKind: CssTokenKind.NUMBER, repr};
}

private nextTwoInputsPointsAreWhitespace() {
return this.nextTwoInputCodePoints.every((c) => this.isWhitespace(c));
return this.nextTwoInputCodePoints().every((c) => this.isWhitespace(c));
}

/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#check-if-two-code-points-are-a-valid-escape */
Expand All @@ -563,8 +566,8 @@ class Tokenizer {

private streamStartsWithValidEscape() {
return this.twoCodePointsAreValidEscape(
this.currentInputCodePoint,
this.nextInputCodePoint,
this.currentInputCodePoint(),
this.nextInputCodePoint(),
);
}

Expand All @@ -588,8 +591,8 @@ class Tokenizer {

private streamStartsWithANumber() {
return this.threeCodePointsWouldStartANumber(
this.currentInputCodePoint,
...this.nextTwoInputCodePoints,
this.currentInputCodePoint(),
...this.nextTwoInputCodePoints(),
);
}

Expand Down Expand Up @@ -618,8 +621,8 @@ class Tokenizer {

private streamStartsWithAnIdentSequence() {
return this.threeCodePointsWouldStartAnIdentSequence(
this.currentInputCodePoint,
...this.nextTwoInputCodePoints,
this.currentInputCodePoint(),
...this.nextTwoInputCodePoints(),
);
}

Expand Down
22 changes: 22 additions & 0 deletions src/builders/html_sanitizer/default_css_sanitizer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* @license
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @fileoverview This file exports a default instance of the CSS sanitizer,
* similarly to how the default instance of the HTML sanitizer is exported.
*
* The reason why it's in a separate file is to ensure that html_sanitizer.ts
* doesn't depend on html_sanitizer_builder.ts, which would cause
* a circular dependency.
*/

import {pure} from '../../internals/pure.js';
import {CssSanitizerBuilder} from './html_sanitizer_builder.js';
const defaultCssSanitizer = /* #__PURE__ */ pure(() =>
new CssSanitizerBuilder().build(),
);
/** Sanitizes untrusted CSS using the default sanitizer configuration. */
export function sanitizeHtmlWithCss(css: string): DocumentFragment {
return defaultCssSanitizer.sanitizeToFragment(css);
}
Loading

0 comments on commit 7899cd1

Please sign in to comment.