Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ [RUM-162] Truncate resources URL containing data URLs #2690

Merged
merged 12 commits into from
Apr 11, 2024
6 changes: 4 additions & 2 deletions packages/rum-core/src/domain/resource/resourceCollection.ts
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we also truncate the url coming from performance resource entries?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Notes on our slack discussion, we found that here it says:

If an HTML IMG element has a data: URI as its source [RFC2397], then this resource will not be included as a PerformanceResourceTiming object in the Performance Timeline. By definition data: URI contains embedded data and does not require a fetch.

So we do not sanitize data url in this case.

Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ import { RumPerformanceEntryType } from '../../browser/performanceCollection'
import type { RumXhrResourceEventDomainContext, RumFetchResourceEventDomainContext } from '../../domainContext.types'
import type { RawRumResourceEvent } from '../../rawRumEvent.types'
import { RumEventType } from '../../rawRumEvent.types'
import type { LifeCycle, RawRumEventCollectedData } from '../lifeCycle'
import { LifeCycleEventType } from '../lifeCycle'
import type { RawRumEventCollectedData, LifeCycle } from '../lifeCycle'
import type { RequestCompleteEvent } from '../requestCollection'
import type { RumSessionManager } from '../rumSessionManager'
import type { PageStateHistory } from '../contexts/pageStateHistory'
Expand All @@ -30,6 +30,8 @@ import {
computeResourceKind,
computeSize,
isRequestKind,
isLongDataUrl,
sanitizeDataUrl,
} from './resourceUtils'

export function startResourceCollection(
Expand Down Expand Up @@ -91,7 +93,7 @@ function processRequest(
duration,
method: request.method,
status_code: request.status,
url: request.url,
url: isLongDataUrl(request.url) ? sanitizeDataUrl(request.url) : request.url,
},
type: RumEventType.RESOURCE as const,
_dd: {
Expand Down
45 changes: 45 additions & 0 deletions packages/rum-core/src/domain/resource/resourceUtils.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import { RumPerformanceEntryType, type RumPerformanceResourceTiming } from '../.
import type { RumConfiguration } from '../configuration'
import { validateAndBuildRumConfiguration } from '../configuration'
import {
MAX_ATTRIBUTE_VALUE_CHAR_LENGTH,
computePerformanceResourceDetails,
computePerformanceResourceDuration,
computeResourceKind,
isAllowedRequestUrl,
isLongDataUrl,
sanitizeDataUrl,
} from './resourceUtils'

function generateResourceWith(overrides: Partial<RumPerformanceResourceTiming>) {
Expand Down Expand Up @@ -313,3 +316,45 @@ describe('shouldTrackResource', () => {
expect(isAllowedRequestUrl(configuration, 'https://my-domain.com/hello?a=b')).toBe(true)
})
})

describe('isLongDataUrl and sanitizeDataUrl', () => {
const longString = new Array(MAX_ATTRIBUTE_VALUE_CHAR_LENGTH).join('a')
it('returns truncated url when detects data url of json', () => {
const longDataUrl = `data:text/json; charset=utf-8,${longString}`
expect(isLongDataUrl(longDataUrl)).toEqual(true)
expect(sanitizeDataUrl(longDataUrl)).toEqual('data:text/json; charset=utf-8,[...]')
})

it('returns truncated url when detects data url of html', () => {
const longDataUrl = `data:text/html,${longString}`
expect(isLongDataUrl(longDataUrl)).toEqual(true)
expect(sanitizeDataUrl(longDataUrl)).toEqual('data:text/html,[...]')
})

it('returns truncated url when detects data url of image', () => {
const longDataUrl = `data:image/svg+xml;base64,${longString}`
expect(isLongDataUrl(longDataUrl)).toEqual(true)
expect(sanitizeDataUrl(longDataUrl)).toEqual('data:image/svg+xml;base64,[...]')
})
it('returns truncated url when detects plain data url', () => {
const plainDataUrl = `data:,${longString}`
expect(isLongDataUrl(plainDataUrl)).toEqual(true)
expect(sanitizeDataUrl(plainDataUrl)).toEqual('data:,[...]')
})

it('returns truncated url when detects data url with exotic mime type', () => {
const exoticTypeDataUrl = `data:application/vnd.openxmlformats;fileName=officedocument.presentationxml;base64,${longString}`
expect(isLongDataUrl(exoticTypeDataUrl)).toEqual(true)
expect(sanitizeDataUrl(exoticTypeDataUrl)).toEqual(
'data:application/vnd.openxmlformats;fileName=officedocument.presentationxml;base64,[...]'
)
})

it('returns the original url when the data url is within limit', () => {
expect(isLongDataUrl(`data:,${longString.substring(5)}`)).toEqual(false)
})

it('returns false when no data url found', () => {
expect(isLongDataUrl('https://static.datad0g.com/static/c/70086/chunk.min.js')).toEqual(false)
})
})
18 changes: 18 additions & 0 deletions packages/rum-core/src/domain/resource/resourceUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,21 @@ export function computeSize(entry: RumPerformanceResourceTiming) {
export function isAllowedRequestUrl(configuration: RumConfiguration, url: string) {
return url && !configuration.isIntakeUrl(url)
}

const DATA_URL_REGEX = /data:(.+)?(;base64)?,/g
export const MAX_ATTRIBUTE_VALUE_CHAR_LENGTH = 24_000

export function isLongDataUrl(url: string): boolean {
if (url.length <= MAX_ATTRIBUTE_VALUE_CHAR_LENGTH) {
return false
} else if (url.substring(0, 5) === 'data:') {
// Avoid String.match RangeError: Maximum call stack size exceeded
url = url.substring(0, MAX_ATTRIBUTE_VALUE_CHAR_LENGTH)
return true
}
return false
}

export function sanitizeDataUrl(url: string): string {
return `${url.match(DATA_URL_REGEX)![0]}[...]`
}
1 change: 1 addition & 0 deletions packages/rum-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ export { STABLE_ATTRIBUTES } from './domain/getSelectorFromElement'
export * from './browser/htmlDomUtils'
export * from './browser/polyfills'
export { getSessionReplayUrl } from './domain/getSessionReplayUrl'
export { isLongDataUrl, sanitizeDataUrl, MAX_ATTRIBUTE_VALUE_CHAR_LENGTH } from './domain/resource/resourceUtils'
2 changes: 0 additions & 2 deletions packages/rum/src/domain/record/privacy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ import {
CENSORED_STRING_MARK,
} from '../../constants'

export const MAX_ATTRIBUTE_VALUE_CHAR_LENGTH = 100_000

const TEXT_MASKING_CHAR = 'x'

export type NodePrivacyLevelCache = Map<Node, NodePrivacyLevel>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import { isIE } from '@datadog/browser-core'

import type { RumConfiguration } from '@datadog/browser-rum-core'
import { STABLE_ATTRIBUTES, DEFAULT_PROGRAMMATIC_ACTION_NAME_ATTRIBUTE } from '@datadog/browser-rum-core'
import {
STABLE_ATTRIBUTES,
DEFAULT_PROGRAMMATIC_ACTION_NAME_ATTRIBUTE,
MAX_ATTRIBUTE_VALUE_CHAR_LENGTH,
} from '@datadog/browser-rum-core'
import { NodePrivacyLevel, PRIVACY_ATTR_NAME } from '../../../constants'
import { MAX_ATTRIBUTE_VALUE_CHAR_LENGTH } from '../privacy'
import { serializeAttribute } from './serializeAttribute'

const DEFAULT_CONFIGURATION = {} as RumConfiguration
Expand All @@ -18,13 +21,16 @@ describe('serializeAttribute', () => {
it('truncates "data:" URIs after long string length', () => {
const node = document.createElement('p')

const longString = new Array(MAX_ATTRIBUTE_VALUE_CHAR_LENGTH + 1 - 5).join('a')
const maxAttributeValue = `data:${longString}`
const exceededAttributeValue = `data:${longString}1`
const ignoredAttributeValue = `foos:${longString}`
const longString = new Array(MAX_ATTRIBUTE_VALUE_CHAR_LENGTH - 5).join('a')
const maxAttributeValue = `data:,${longString}`
const exceededAttributeValue = `data:,${longString}aa`
const dataUrlAttributeValue = `data:,${longString}a`
const truncatedValue = 'data:,[...]'
const ignoredAttributeValue = `foos:,${longString}`

node.setAttribute('test-okay', maxAttributeValue)
node.setAttribute('test-truncate', exceededAttributeValue)
node.setAttribute('test-truncate', dataUrlAttributeValue)
node.setAttribute('test-ignored', ignoredAttributeValue)

expect(serializeAttribute(node, NodePrivacyLevel.ALLOW, 'test-okay', DEFAULT_CONFIGURATION)).toBe(maxAttributeValue)
Expand All @@ -35,11 +41,10 @@ describe('serializeAttribute', () => {
)

expect(serializeAttribute(node, NodePrivacyLevel.ALLOW, 'test-truncate', DEFAULT_CONFIGURATION)).toBe(
'data:truncated'
)
expect(serializeAttribute(node, NodePrivacyLevel.MASK, 'test-truncate', DEFAULT_CONFIGURATION)).toBe(
'data:truncated'
truncatedValue
)
expect(serializeAttribute(node, NodePrivacyLevel.MASK, 'test-truncate', DEFAULT_CONFIGURATION)).toBe(truncatedValue)
expect(serializeAttribute(node, NodePrivacyLevel.MASK, 'test-truncate', DEFAULT_CONFIGURATION)).toBe(truncatedValue)
})

it('does not mask the privacy attribute', () => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { startsWith } from '@datadog/browser-core'
import { STABLE_ATTRIBUTES } from '@datadog/browser-rum-core'
import { STABLE_ATTRIBUTES, isLongDataUrl, sanitizeDataUrl } from '@datadog/browser-rum-core'
import type { RumConfiguration } from '@datadog/browser-rum-core'
import { NodePrivacyLevel, PRIVACY_ATTR_NAME, CENSORED_STRING_MARK, CENSORED_IMG_MARK } from '../../../constants'
import { MAX_ATTRIBUTE_VALUE_CHAR_LENGTH } from '../privacy'
import { censoredImageForSize } from './serializationUtils'

export function serializeAttribute(
Expand Down Expand Up @@ -69,8 +68,8 @@ export function serializeAttribute(
}

// Minimum Fix for customer.
if (attributeValue.length > MAX_ATTRIBUTE_VALUE_CHAR_LENGTH && attributeValue.slice(0, 5) === 'data:') {
return 'data:truncated'
if (isLongDataUrl(attributeValue)) {
return sanitizeDataUrl(attributeValue)
}

return attributeValue
Expand Down