From 560f427c430107de9a2785960667dddfa7785fbe Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 11 Mar 2025 02:41:10 +0100 Subject: [PATCH] feat(utils): add find in file logic --- packages/utils/src/lib/file-system.ts | 105 ++++++++++++++++++ .../utils/src/lib/file-system.unit.test.ts | 41 +++++++ 2 files changed, 146 insertions(+) diff --git a/packages/utils/src/lib/file-system.ts b/packages/utils/src/lib/file-system.ts index 966dbc536..da5e47ea0 100644 --- a/packages/utils/src/lib/file-system.ts +++ b/packages/utils/src/lib/file-system.ts @@ -1,7 +1,10 @@ import { bold, gray } from 'ansis'; import { type Options, bundleRequire } from 'bundle-require'; +import * as fs from 'node:fs'; import { mkdir, readFile, readdir, rm, stat } from 'node:fs/promises'; import path from 'node:path'; +import * as readline from 'node:readline'; +import type { SourceFileLocation } from '@code-pushup/models'; import { formatBytes } from './formatting.js'; import { logMultipleResults } from './log-results.js'; import { ui } from './logging.js'; @@ -93,6 +96,7 @@ export type CrawlFileSystemOptions = { pattern?: string | RegExp; fileTransform?: (filePath: string) => Promise | T; }; + export async function crawlFileSystem( options: CrawlFileSystemOptions, ): Promise { @@ -159,3 +163,104 @@ export function filePathToCliArg(filePath: string): string { export function projectToFilename(project: string): string { return project.replace(/[/\\\s]+/g, '-').replace(/@/g, ''); } + +export type LineHit = { + startColumn: number; + endColumn: number; +}; + +export type FileHit = Pick & + Exclude; + +const escapeRegExp = (str: string): string => + str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +const ensureGlobalRegex = (pattern: RegExp): RegExp => + new RegExp( + pattern.source, + pattern.flags.includes('g') ? pattern.flags : `${pattern.flags}g`, + ); + +const findAllMatches = ( + line: string, + searchPattern: string | RegExp | ((line: string) => LineHit[] | null), +): LineHit[] => { + if (typeof searchPattern === 'string') { + return [...line.matchAll(new RegExp(escapeRegExp(searchPattern), 'g'))].map( + ({ index = 0 }) => ({ + startColumn: index, + endColumn: index + searchPattern.length, + }), + ); + } + + if (searchPattern instanceof RegExp) { + return [...line.matchAll(ensureGlobalRegex(searchPattern))].map( + ({ index = 0, 0: match }) => ({ + startColumn: index, + endColumn: index + match.length, + }), + ); + } + + return searchPattern(line) || []; +}; + +/** + * Reads a file line-by-line and checks if it contains the search pattern. + * @param file - The file path to check. + * @param searchPattern - The pattern to match. + * @param options - Additional options. If true, the search will stop after the first hit. + * @returns Promise - List of hits with matching details. + */ +export async function findInFile( + file: string, + searchPattern: string | RegExp | ((line: string) => LineHit[] | null), + options?: { bail?: boolean }, +): Promise { + const { bail = false } = options || {}; + const hits: FileHit[] = []; + + return new Promise((resolve, reject) => { + const stream = fs.createReadStream(file, { encoding: 'utf8' }); + const rl = readline.createInterface({ input: stream }); + // eslint-disable-next-line functional/no-let + let lineNumber = 0; + // eslint-disable-next-line functional/no-let + let isResolved = false; + + rl.on('line', line => { + lineNumber++; + const matches = findAllMatches(line, searchPattern); + + matches.forEach(({ startColumn, endColumn }) => { + // eslint-disable-next-line functional/immutable-data + hits.push({ + file, + startLine: lineNumber, + startColumn, + endLine: lineNumber, + endColumn, + }); + + if (bail && !isResolved) { + isResolved = true; + stream.destroy(); + resolve(hits); + } + }); + }); + rl.once('close', () => { + if (!isResolved) { + isResolved = true; + } + resolve(hits); // Resolve only once after closure + }); + + rl.once('error', error => { + if (!isResolved) { + isResolved = true; + reject(error); + } + }); + }); +} diff --git a/packages/utils/src/lib/file-system.unit.test.ts b/packages/utils/src/lib/file-system.unit.test.ts index dfb76ee06..957dcb2ef 100644 --- a/packages/utils/src/lib/file-system.unit.test.ts +++ b/packages/utils/src/lib/file-system.unit.test.ts @@ -8,6 +8,7 @@ import { crawlFileSystem, ensureDirectoryExists, filePathToCliArg, + findInFile, findLineNumberInText, findNearestFile, logMultipleFileResults, @@ -263,3 +264,43 @@ describe('projectToFilename', () => { expect(projectToFilename(project)).toBe(file); }); }); + +describe('findInFile', () => { + const file = 'file.txt'; + const content = + 'line 1 - even:false\nline 2 - even:true\nline 3 - even:false\nline 4 - even:true\nline 5 - even:false\n'; + const filePath = path.join(MEMFS_VOLUME, file); + + beforeEach(() => { + vol.reset(); + vol.fromJSON({ [file]: content }, MEMFS_VOLUME); + }); + + it('should find pattern in a file if a string is given', async () => { + const result = await findInFile(filePath, 'line 3'); + expect(result).toStrictEqual([ + { + file: filePath, + endColumn: 6, + endLine: 3, + startColumn: 0, + startLine: 3, + }, + ]); + }); + + // @TODO any second test will fail + // Error: EBADF: bad file descriptor, close + it.todo('should find pattern in a file if a RegEx is given', async () => { + const result = await findInFile('file.txt', new RegExp('line 3', 'g')); + expect(result).toStrictEqual([ + { + file: 'file.txt', + endColumn: 6, + endLine: 3, + startColumn: 0, + startLine: 3, + }, + ]); + }); +});