Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No public description #425

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/builders/html_sanitizer/css/sanitizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* that bug and possibly other ones.
*/

import {safeStyleEl} from '../../../dom/index.js';
import {setTextContent} from '../../../dom/elements/style.js';
import {createStyleSheetInternal} from '../../../internals/style_sheet_impl.js';
import {
ResourceUrlPolicy,
Expand Down Expand Up @@ -51,7 +51,7 @@ class CssSanitizer {
private getStyleSheet(cssText: string): CSSStyleSheet {
const style = this.inertDocument.createElement('style');
const safeStyle = createStyleSheetInternal(cssText);
safeStyleEl.setTextContent(style, safeStyle);
setTextContent(style, safeStyle);
this.inertDocument.head.appendChild(style);
const sheet = style.sheet!; // guaranteed to be non-null
style.remove();
Expand Down
81 changes: 42 additions & 39 deletions src/builders/html_sanitizer/css/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,23 @@ class Tokenizer {
*
* https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#next-input-code-point
*/
private get nextInputCodePoint(): string | EOF {
private nextInputCodePoint(): string | EOF {
return this.css[this.pos];
}

private get nextTwoInputCodePoints(): [string | EOF, string | EOF] {
private nextTwoInputCodePoints(): [string | EOF, string | EOF] {
return [this.css[this.pos], this.css[this.pos + 1]];
}

private get nextThreeInputCodePoints(): [
private nextThreeInputCodePoints(): [
string | EOF,
string | EOF,
string | EOF,
] {
return [this.css[this.pos], this.css[this.pos + 1], this.css[this.pos + 2]];
}

private get currentInputCodePoint(): string | EOF {
private currentInputCodePoint(): string | EOF {
return this.css[this.pos - 1];
}

Expand Down Expand Up @@ -136,7 +136,7 @@ class Tokenizer {
// ":ho st", which is safe.
return {tokenKind: CssTokenKind.WHITESPACE};
}
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF) {
return {tokenKind: CssTokenKind.EOF};
Expand All @@ -147,8 +147,8 @@ class Tokenizer {
return this.consumeString(codePoint);
} else if (codePoint === '#') {
if (
this.isIdentCodePoint(this.nextInputCodePoint) ||
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints)
this.isIdentCodePoint(this.nextInputCodePoint()) ||
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints())
) {
// In spec there's also a step to check if the next three code points
// would start an ident sequence. However, the only reason to do so
Expand Down Expand Up @@ -208,7 +208,7 @@ class Tokenizer {
} else if (codePoint === '@') {
if (
this.threeCodePointsWouldStartAnIdentSequence(
...this.nextThreeInputCodePoints,
...this.nextThreeInputCodePoints(),
)
) {
const ident = this.consumeIdentSequence();
Expand Down Expand Up @@ -271,7 +271,7 @@ class Tokenizer {
value: '',
};
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF || codePoint === quote) {
return stringToken;
Expand All @@ -283,10 +283,10 @@ class Tokenizer {
stringToken.value = '';
return stringToken;
} else if (codePoint === '\\') {
if (this.nextInputCodePoint === EOF) {
if (this.nextInputCodePoint() === EOF) {
// > If the next input code point is EOF, do nothing.
continue;
} else if (this.isNewline(this.nextInputCodePoint)) {
} else if (this.isNewline(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
} else {
const escapedCodePoint = this.consumeEscapedCodePoint();
Expand All @@ -300,7 +300,7 @@ class Tokenizer {

/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-escaped-code-point */
private consumeEscapedCodePoint(): string {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF) {
return '\ufffd';
Expand All @@ -311,12 +311,15 @@ class Tokenizer {
// The spec assumes here that the first hex digit has already been
// consumed. So in fact, the maximum number of hex digits that can be
// consumed is 6.
while (this.isHexDigit(this.nextInputCodePoint) && hexDigits.length < 6) {
hexDigits += this.nextInputCodePoint;
while (
this.isHexDigit(this.nextInputCodePoint()) &&
hexDigits.length < 6
) {
hexDigits += this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
}
// Whitespace directly following an escape sequence is ignored.
if (this.isWhitespace(this.nextInputCodePoint)) {
if (this.isWhitespace(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
}
// Needed to parse hexadecimal.
Expand All @@ -329,7 +332,7 @@ class Tokenizer {
}

private consumeAsMuchWhitespaceAsPossible() {
while (this.isWhitespace(this.nextInputCodePoint)) {
while (this.isWhitespace(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
}
}
Expand All @@ -338,9 +341,9 @@ class Tokenizer {
private consumeIdentSequence(): string {
let result = '';
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
const codePoint2 = this.nextInputCodePoint;
const codePoint2 = this.nextInputCodePoint();
if (this.isIdentCodePoint(codePoint)) {
result += codePoint;
} else if (this.twoCodePointsAreValidEscape(codePoint, codePoint2)) {
Expand All @@ -355,15 +358,15 @@ class Tokenizer {
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-ident-like-token */
private consumeIdentLikeToken(): CssToken | CssToken[] {
const ident = this.consumeIdentSequence();
if (/^url$/i.test(ident) && this.nextInputCodePoint === '(') {
if (/^url$/i.test(ident) && this.nextInputCodePoint() === '(') {
// TODO(securitymb): This algorithm may look a little weird but we're
// following the spec here exactly. We will see later on if this can be
// optimized.
this.consumeTheNextInputCodePoint();
while (this.nextTwoInputsPointsAreWhitespace()) {
this.consumeTheNextInputCodePoint();
}
const nextTwo = this.nextTwoInputCodePoints;
const nextTwo = this.nextTwoInputCodePoints();
if (
(this.isWhitespace(nextTwo[0]) &&
(nextTwo[1] === '"' || nextTwo[1] === "'")) ||
Expand All @@ -376,7 +379,7 @@ class Tokenizer {
} else {
return this.consumeUrlToken();
}
} else if (this.nextInputCodePoint === '(') {
} else if (this.nextInputCodePoint() === '(') {
this.consumeTheNextInputCodePoint();
// We lowercase the function name because function names are
// case-insensitive in CSS.
Expand Down Expand Up @@ -413,15 +416,15 @@ class Tokenizer {
let url = '';
this.consumeAsMuchWhitespaceAsPossible();
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === ')' || codePoint === EOF) {
return this.createFunctionUrlToken(url);
} else if (this.isWhitespace(codePoint)) {
this.consumeAsMuchWhitespaceAsPossible();
if (
this.nextInputCodePoint === ')' ||
this.nextInputCodePoint === EOF
this.nextInputCodePoint() === ')' ||
this.nextInputCodePoint() === EOF
) {
this.consumeTheNextInputCodePoint();
return this.createFunctionUrlToken(url);
Expand Down Expand Up @@ -462,7 +465,7 @@ class Tokenizer {
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-the-remnants-of-a-bad-url */
private consumeRemnantsOfBadUrl() {
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF || codePoint === ')') {
return;
Expand All @@ -484,23 +487,23 @@ class Tokenizer {
private consumeNumber(): string {
let repr = '';
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
if (next === '+' || next === '-') {
this.consumeTheNextInputCodePoint();
repr += next;
}
}
repr += this.consumeDigits();
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
const next2 = this.css[this.pos + 1];
if (next === '.' && this.isDigit(next2)) {
this.consumeTheNextInputCodePoint();
repr += '.' + this.consumeDigits();
}
}
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
const next2 = this.css[this.pos + 1];
const next3 = this.css[this.pos + 2];
if (next === 'e' || next === 'E') {
Expand All @@ -518,8 +521,8 @@ class Tokenizer {

private consumeDigits(): string {
let repr = '';
while (this.isDigit(this.nextInputCodePoint)) {
repr += this.nextInputCodePoint;
while (this.isDigit(this.nextInputCodePoint())) {
repr += this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
}
return repr;
Expand All @@ -533,7 +536,7 @@ class Tokenizer {
const repr = this.consumeNumber();
if (
this.threeCodePointsWouldStartAnIdentSequence(
...this.nextThreeInputCodePoints,
...this.nextThreeInputCodePoints(),
)
) {
return {
Expand All @@ -542,15 +545,15 @@ class Tokenizer {
dimension: this.consumeIdentSequence(),
};
}
if (this.nextInputCodePoint === '%') {
if (this.nextInputCodePoint() === '%') {
this.consumeTheNextInputCodePoint();
return {tokenKind: CssTokenKind.PERCENTAGE, repr};
}
return {tokenKind: CssTokenKind.NUMBER, repr};
}

private nextTwoInputsPointsAreWhitespace() {
return this.nextTwoInputCodePoints.every((c) => this.isWhitespace(c));
return this.nextTwoInputCodePoints().every((c) => this.isWhitespace(c));
}

/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#check-if-two-code-points-are-a-valid-escape */
Expand All @@ -563,8 +566,8 @@ class Tokenizer {

private streamStartsWithValidEscape() {
return this.twoCodePointsAreValidEscape(
this.currentInputCodePoint,
this.nextInputCodePoint,
this.currentInputCodePoint(),
this.nextInputCodePoint(),
);
}

Expand All @@ -588,8 +591,8 @@ class Tokenizer {

private streamStartsWithANumber() {
return this.threeCodePointsWouldStartANumber(
this.currentInputCodePoint,
...this.nextTwoInputCodePoints,
this.currentInputCodePoint(),
...this.nextTwoInputCodePoints(),
);
}

Expand Down Expand Up @@ -618,8 +621,8 @@ class Tokenizer {

private streamStartsWithAnIdentSequence() {
return this.threeCodePointsWouldStartAnIdentSequence(
this.currentInputCodePoint,
...this.nextTwoInputCodePoints,
this.currentInputCodePoint(),
...this.nextTwoInputCodePoints(),
);
}

Expand Down
22 changes: 22 additions & 0 deletions src/builders/html_sanitizer/default_css_sanitizer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* @license
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @fileoverview This file exports a default instance of the CSS sanitizer,
* similarly to how the default instance of the HTML sanitizer is exported.
*
* The reason why it's in a separate file is to ensure that html_sanitizer.ts
* doesn't depend on html_sanitizer_builder.ts, which would cause
* a circular dependency.
*/

import {pure} from '../../internals/pure.js';
import {CssSanitizerBuilder} from './html_sanitizer_builder.js';
const defaultCssSanitizer = /* #__PURE__ */ pure(() =>
new CssSanitizerBuilder().build(),
);
/** Sanitizes untrusted CSS using the default sanitizer configuration. */
export function sanitizeHtmlWithCss(css: string): DocumentFragment {
return defaultCssSanitizer.sanitizeToFragment(css);
}
Loading
Loading