Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(node) refactor contextlines to use readline #12221

Merged
merged 29 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2d042a2
ref(node): use readline for context lines
JonasBa May 25, 2024
1fc2544
test(node): contextline tests
JonasBa May 26, 2024
b0df967
test(node): contextline tests
JonasBa May 26, 2024
3a785bc
ref(node): contextlines cache attempt
JonasBa May 28, 2024
28f576f
ref(node) exclude other contextlines extensions
JonasBa Jun 9, 2024
2cfda9b
test(node) fix contextlines test
JonasBa Jun 10, 2024
db2df8d
fix scoped test
JonasBa Jun 10, 2024
2be28a5
lint
JonasBa Jun 10, 2024
93e8173
fix regexp danger warning
JonasBa Jun 10, 2024
a949d73
Merge branch 'develop' into jb/node/contextlines-memory
JonasBa Jun 10, 2024
584dad2
strict undefined check
JonasBa Jun 10, 2024
00fd3fc
reshuffle order
JonasBa Jun 10, 2024
6588673
test adding error handler on input stream
JonasBa Jun 10, 2024
96a617e
lint
JonasBa Jun 10, 2024
b4a51f6
fix duplicate reads on stream error
JonasBa Jun 11, 2024
421178e
fix duplicate reads on stream error
JonasBa Jun 11, 2024
306a11b
add emplace
JonasBa Jun 11, 2024
0dca6c5
fix(node) last line is not picked up as it does not include \n
JonasBa Jun 11, 2024
dea113b
ref(contexlines) use ordered startsWith instead of regexp and store c…
JonasBa Jun 12, 2024
e684eb6
Merge branch 'develop' into jb/node/contextlines-memory
JonasBa Jun 12, 2024
9af7dfc
ref(node) remove all but cache set snipline occurences
JonasBa Jun 12, 2024
83a8d05
Merge branch 'develop' into jb/node/contextlines-memory
JonasBa Jun 13, 2024
17dea5b
Merge branch 'develop' into jb/node/contextlines-memory
JonasBa Jun 13, 2024
2e3fe8f
Merge branch 'develop' into jb/node/contextlines-memory
JonasBa Jun 14, 2024
eecbf5f
Merge branch 'develop' into jb/node/contextlines-memory
JonasBa Jun 17, 2024
e79dac2
fix(contextlines) update type assertions
JonasBa Jun 17, 2024
ef1fa79
fix(contextlines) use runtime checks instead
JonasBa Jun 17, 2024
cc4305a
ref(node) fix types in tests
JonasBa Jun 17, 2024
bd35821
test(node) fix wrong type assertion
JonasBa Jun 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
328 changes: 250 additions & 78 deletions packages/node/src/integrations/contextlines.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,15 @@
import { promises } from 'node:fs';
import { createReadStream } from 'node:fs';
import { createInterface } from 'node:readline';
import { defineIntegration } from '@sentry/core';
import type { Event, IntegrationFn, StackFrame } from '@sentry/types';
import { LRUMap, addContextToFrame } from '@sentry/utils';
import { logger, LRUMap, snipLine } from '@sentry/utils';

const FILE_CONTENT_CACHE = new LRUMap<string, string[] | null>(100);
import { DEBUG_BUILD } from '../debug-build';

const LRU_FILE_CONTENTS_CACHE = new LRUMap<string, Record<number, string>>(10);
const DEFAULT_LINES_OF_CONTEXT = 7;
const INTEGRATION_NAME = 'ContextLines';

const readFileAsync = promises.readFile;

/**
* Resets the file cache. Exists for testing purposes.
* @hidden
*/
export function resetFileContentCache(): void {
FILE_CONTENT_CACHE.clear();
}

interface ContextLinesOptions {
/**
* Sets the number of context lines for each frame when loading a file.
Expand All @@ -27,62 +20,186 @@
frameContextLines?: number;
}

/** Exported only for tests, as a type-safe variant. */
export const _contextLinesIntegration = ((options: ContextLinesOptions = {}) => {
const contextLines = options.frameContextLines !== undefined ? options.frameContextLines : DEFAULT_LINES_OF_CONTEXT;
/**
* Exported for testing purposes.
*/
export function resetFileContentCache(): void {
LRU_FILE_CONTENTS_CACHE.clear();
}

return {
name: INTEGRATION_NAME,
processEvent(event) {
return addSourceContext(event, contextLines);
},
};
}) satisfies IntegrationFn;
/**
* Determines if context lines should be skipped for a file.
* - .min.(mjs|cjs|js) files are and not useful since they dont point to the original source
* - node: prefixed modules are part of the runtime and cannot be resolved to a file
* - data: skip json, wasm and inline js https://nodejs.org/api/esm.html#data-imports
*/
const SKIP_CONTEXTLINES_REGEXP = /^(node|data):|\.min\.(mjs|cjs|js$)/;
Fixed Show fixed Hide fixed
function shouldSkipContextLinesForFile(path: string): boolean {
return SKIP_CONTEXTLINES_REGEXP.test(path);
JonasBa marked this conversation as resolved.
Show resolved Hide resolved
}
/**
* Checks if we have all the contents that we need in the cache.
*/
function rangeExistsInContentCache(file: string, range: ReadlineRange): boolean {
const contents = LRU_FILE_CONTENTS_CACHE.get(file);
if (!contents) return false;

for (let i = range[0]; i <= range[1]; i++) {
if (contents[i] === undefined) {
return false;
}
}

return true;
}

/**
* Capture the lines before and after the frame's context.
* Creates contiguous ranges of lines to read from a file. In the case where context lines overlap,
* the ranges are merged to create a single range.
*/
export const contextLinesIntegration = defineIntegration(_contextLinesIntegration);
function makeLineReaderRanges(lines: number[], linecontext: number): ReadlineRange[] {
if (!lines.length) {
return [];
}

let i = 0;
let current = makeContextRange(lines[i], linecontext);
const out: ReadlineRange[] = [];
// eslint-disable-next-line no-constant-condition
while (true) {
if (i === lines.length - 1) {
out.push(current);
break;
}

// If the next line falls into the current range, extend the current range to lineno + linecontext.
const next = lines[i + 1];
if (next <= current[1]) {
current[1] = next + linecontext;
} else {
out.push(current);
current = makeContextRange(next, linecontext);
}

i++;
}

return out;
}

/**
* Extracts lines from a file and stores them in a cache.
*/
function getContextLinesFromFile(path: string, ranges: ReadlineRange[], output: Record<number, string>): Promise<void> {
return new Promise((resolve, _reject) => {
// It is important *not* to have any async code between createInterface and the 'line' event listener
// as it will cause the 'line' event to
// be emitted before the listener is attached.
const fileStream = createInterface({
input: createReadStream(path),
});

// Init at zero and increment at the start of the loop because lines are 1 indexed.
let lineNumber = 0;
let currentRangeIndex = 0;
let rangeStart = ranges[currentRangeIndex][0];
let rangeEnd = ranges[currentRangeIndex][1];

fileStream.on('line', line => {
lineNumber++;
if (lineNumber < rangeStart) return;

// !Warning: Mutates the cache value
output[lineNumber] = line;

if (lineNumber >= rangeEnd) {
if (currentRangeIndex === ranges.length - 1) {
// We need to close the file stream and remove listeners, else the reader will continue to run our listener;
fileStream.close();
fileStream.removeAllListeners();
return;
}
currentRangeIndex++;
rangeStart = ranges[currentRangeIndex][0];
rangeEnd = ranges[currentRangeIndex][1];
}
});

fileStream.on('close', resolve);
// We use this inside Promise.all, so we need to resolve the promise even if there is an error
// to prevent Promise.all from short circuiting the rest.
fileStream.on('error', e => {
DEBUG_BUILD && logger.error(`Failed to read file: ${path}. Error: ${e}`);
resolve();
});
});
}

async function addSourceContext(event: Event, contextLines: number): Promise<Event> {
// keep a lookup map of which files we've already enqueued to read,
// so we don't enqueue the same file multiple times which would cause multiple i/o reads
const enqueuedReadSourceFileTasks: Record<string, number> = {};
const readSourceFileTasks: Promise<string[] | null>[] = [];
const filesToLines: Record<string, number[]> = {};

if (contextLines > 0 && event.exception?.values) {
for (const exception of event.exception.values) {
if (!exception.stacktrace?.frames) {
if (!exception.stacktrace?.frames?.length) {
continue;
}

// We want to iterate in reverse order as calling cache.get will bump the file in our LRU cache.
// This ends up prioritizes source context for frames at the top of the stack instead of the bottom.
// Maps preserve insertion order, so we iterate in reverse, starting at the
// outermost frame and closer to where the exception has occurred (poor mans priority)
for (let i = exception.stacktrace.frames.length - 1; i >= 0; i--) {
const frame = exception.stacktrace.frames[i];
// Call cache.get to bump the file to the top of the cache and ensure we have not already
// enqueued a read operation for this filename
if (frame.filename && !enqueuedReadSourceFileTasks[frame.filename] && !FILE_CONTENT_CACHE.get(frame.filename)) {
readSourceFileTasks.push(_readSourceFile(frame.filename));
enqueuedReadSourceFileTasks[frame.filename] = 1;

if (
typeof frame.filename !== 'string' ||
typeof frame.lineno !== 'number' ||
JonasBa marked this conversation as resolved.
Show resolved Hide resolved
shouldSkipContextLinesForFile(frame.filename)
) {
continue;
}

if (!filesToLines[frame.filename]) filesToLines[frame.filename] = [];
filesToLines[frame.filename].push(frame.lineno);
}
}
}

// check if files to read > 0, if so, await all of them to be read before adding source contexts.
// Normally, Promise.all here could be short circuited if one of the promises rejects, but we
// are guarding from that by wrapping the i/o read operation in a try/catch.
if (readSourceFileTasks.length > 0) {
await Promise.all(readSourceFileTasks);
const files = Object.keys(filesToLines);
if (files.length == 0) {
return event;
}

const readlinePromises: Promise<void>[] = [];
for (const file of files) {
// Sort ranges so that they are sorted by line increasing order and match how the file is read.
filesToLines[file].sort((a, b) => a - b);
const ranges = makeLineReaderRanges(filesToLines[file], contextLines);

// If the contents are already in the cache, then we dont need to read the file.
if (ranges.every(r => rangeExistsInContentCache(file, r))) {
continue;
}

let cache = LRU_FILE_CONTENTS_CACHE.get(file);
if (!cache) {
cache = {};
LRU_FILE_CONTENTS_CACHE.set(file, cache);
}
readlinePromises.push(getContextLinesFromFile(file, ranges, cache));
}

// The promise rejections are caught in order to prevent them from short circuiting Promise.all
await Promise.all(readlinePromises).catch(() => {
DEBUG_BUILD && logger.log('Failed to read one or more source files and resolve context lines');
});

// Perform the same loop as above, but this time we can assume all files are in the cache
// and attempt to add source context to frames.
if (contextLines > 0 && event.exception?.values) {
for (const exception of event.exception.values) {
if (exception.stacktrace && exception.stacktrace.frames) {
await addSourceContextToFrames(exception.stacktrace.frames, contextLines);
if (exception.stacktrace && exception.stacktrace.frames && exception.stacktrace.frames.length > 0) {
addSourceContextToFrames(exception.stacktrace.frames, contextLines, LRU_FILE_CONTENTS_CACHE);
}
}
}
Expand All @@ -91,56 +208,111 @@
}

/** Adds context lines to frames */
function addSourceContextToFrames(frames: StackFrame[], contextLines: number): void {
function addSourceContextToFrames(
frames: StackFrame[],
contextLines: number,
cache: LRUMap<string, Record<number, string>>,
): void {
for (const frame of frames) {
// Only add context if we have a filename and it hasn't already been added
if (frame.filename && frame.context_line === undefined) {
const sourceFileLines = FILE_CONTENT_CACHE.get(frame.filename);

if (sourceFileLines) {
try {
addContextToFrame(sourceFileLines, frame, contextLines);
} catch (e) {
// anomaly, being defensive in case
// unlikely to ever happen in practice but can definitely happen in theory
}
if (frame.filename && frame.context_line === undefined && typeof frame.lineno === 'number') {
const contents = cache.get(frame.filename);
if (contents) {
addContextToFrame(frame.lineno, frame, contextLines, contents);
}
}
}
}

/**
* Reads file contents and caches them in a global LRU cache.
* If reading fails, mark the file as null in the cache so we don't try again.
*
* @param filename filepath to read content from.
* Clears the context lines from a frame, used to reset a frame to its original state
* if we fail to resolve all context lines for it.
*/
async function _readSourceFile(filename: string): Promise<string[] | null> {
const cachedFile = FILE_CONTENT_CACHE.get(filename);
function clearLineContext(frame: StackFrame): void {
delete frame.pre_context;
delete frame.context_line;
delete frame.post_context;
}

// We have already attempted to read this file and failed, do not try again
if (cachedFile === null) {
return null;
/**
* Resolves context lines before and after the given line number and appends them to the frame;
*/
export function addContextToFrame(
lineno: number,
frame: StackFrame,
contextLines: number,
contents: Record<number, string> | undefined,
): void {
// When there is no line number in the frame, attaching context is nonsensical and will even break grouping.
// We already check for lineno before calling this, but since StackFrame lineno ism optional, we check it again.
if (frame.lineno === undefined || contents === undefined) {
DEBUG_BUILD && logger.error('Cannot resolve context for frame with no lineno or file contents');
return;
}

// We have a cache hit, return it
if (cachedFile !== undefined) {
return cachedFile;
frame.pre_context = [];
for (let i = makeRangeStart(lineno, contextLines); i < lineno; i++) {
// We always expect the start context as line numbers cannot be negative. If we dont find a line, then
// something went wrong somewhere. Clear the context and return without adding any linecontext.
if (contents[i] === undefined) {
clearLineContext(frame);
DEBUG_BUILD && logger.error(`Could not find line ${i} in file ${frame.filename}`);
return;
}

frame.pre_context.push(snipLine(contents[i], 0));
}

// Guard from throwing if readFile fails, this enables us to use Promise.all and
// not have it short circuiting if one of the promises rejects + since context lines are added
// on a best effort basis, we want to throw here anyways.

// If we made it to here, it means that our file is not cache nor marked as failed, so attempt to read it
let content: string[] | null = null;
try {
const rawFileContents = await readFileAsync(filename, 'utf-8');
content = rawFileContents.split('\n');
} catch (_) {
// if we fail, we will mark the file as null in the cache and short circuit next time we try to read it
// We should always have the context line. If we dont, something went wrong, so we clear the context and return
// without adding any linecontext.
if (contents[lineno] === undefined) {
clearLineContext(frame);
DEBUG_BUILD && logger.error(`Could not find line ${lineno} in file ${frame.filename}`);
return;
}
frame.context_line = snipLine(contents[lineno], frame.colno || 0);

FILE_CONTENT_CACHE.set(filename, content);
return content;
const end = makeRangeEnd(lineno, contextLines);
frame.post_context = [];
for (let i = lineno + 1; i <= end; i++) {
// Since we dont track when the file ends, we cant clear the context if we dont find a line as it could
// just be that we reached the end of the file.
if (contents[i] === undefined) {
break;
}
frame.post_context.push(snipLine(contents[i], 0));
}
}

// Helper functions for generating line context ranges. They take a line number and the number of lines of context to
// include before and after the line and generate an inclusive range of indices.
type ReadlineRange = [start: number, end: number];
// Compute inclusive end context range
function makeRangeStart(line: number, linecontext: number): number {
return Math.max(1, line - linecontext);
}
// Compute inclusive start context range
function makeRangeEnd(line: number, linecontext: number): number {
return line + linecontext;
}
// Determine start and end indices for context range (inclusive);
function makeContextRange(line: number, linecontext: number): [start: number, end: number] {
return [makeRangeStart(line, linecontext), makeRangeEnd(line, linecontext)];
}

/** Exported only for tests, as a type-safe variant. */
export const _contextLinesIntegration = ((options: ContextLinesOptions = {}) => {
const contextLines = options.frameContextLines !== undefined ? options.frameContextLines : DEFAULT_LINES_OF_CONTEXT;

return {
name: INTEGRATION_NAME,
processEvent(event) {
return addSourceContext(event, contextLines);
},
};
}) satisfies IntegrationFn;

/**
* Capture the lines before and after the frame's context.
*/
export const contextLinesIntegration = defineIntegration(_contextLinesIntegration);
Loading
Loading