Skip to content

Commit

Permalink
metro-file-map: Implement getOrComputeSha1 (lazy hashing 2/3)
Browse files Browse the repository at this point in the history
Summary:
## Stack
In this stack we're moving towards metro-file-map being able to *lazily* compute file metadata - in particular the SHA1 hash - only when required by the transformer.

More context in #1325 (comment)

## Implementing a lazy hashing API in metro-file-map
This diff adds a new async `getOrComputeSha1` API to the exposed `FileSystem` interface implemented by `TreeFS`. `TreeFS` is provided during construction with a means to use the `FileProcessor`.
 - In common with other `FileSystem` methods, it returns null if the path does not point to a watched regular file.
 - It dereferences symlinks to pass only real, absolute paths to `processFile`, so that it accepts the same range of inputs as `getSha1`.
 - Caches SHA1 in metro-file-map, so that it may be persisted. Safely clears the value on modification, including when modification races processing.
 - Emits a `metadata` event to inform auto-saving caches about a change to internal state.
 - This diff does not exercise the new API, except in tests.
 - TreeFS consumers are required to pass `processFile`, non-breaking as this API is [experimental](https://github.com/facebook/metro/blob/main/packages/metro-file-map/README.md#experimental-metro-file-map).

Changelog: Internal

Differential Revision: D69123130
  • Loading branch information
robhogan authored and facebook-github-bot committed Feb 16, 2025
1 parent e4cbfa7 commit a995194
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ describe.each(Object.keys(CRAWLERS))(
fileSystem: new TreeFS({
rootDir: FIXTURES_DIR,
files: new Map([['removed.js', ['', 123, 234, 0, '', null, 0]]]),
processFile: () => {
throw new Error('Not implemented');
},
}),
clocks: new Map(),
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ const DEFAULT_OPTIONS: CrawlerOptions = {
perfLogger: null,
previousState: {
clocks: new Map(),
fileSystem: new TreeFS({rootDir: systemPath('/roots')}),
fileSystem: new TreeFS({
rootDir: systemPath('/roots'),
processFile: () => {
throw new Error('Not implemented');
},
}),
},
rootDir: systemPath('/roots'),
roots: [
Expand Down
7 changes: 7 additions & 0 deletions packages/metro-file-map/src/flow-types.js
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ export interface FileSystem {
getModuleName(file: Path): ?string;
getSerializableSnapshot(): CacheData['fileSystemData'];
getSha1(file: Path): ?string;
getOrComputeSha1(file: Path): Promise<?{sha1: string, content?: Buffer}>;

/**
* Given a start path (which need not exist), a subpath and type, and
Expand Down Expand Up @@ -382,6 +383,12 @@ export interface MutableFileSystem extends FileSystem {

export type Path = string;

export type ProcessFileFunction = (
absolutePath: string,
metadata: FileMetaData,
request: $ReadOnly<{computeSha1: boolean}>,
) => Promise<?Buffer>;

export type RawMockMap = $ReadOnly<{
duplicates: Map<string, Set<string>>,
mocks: Map<string, Path>,
Expand Down
28 changes: 27 additions & 1 deletion packages/metro-file-map/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import type {
Path,
PerfLogger,
PerfLoggerFactory,
ProcessFileFunction,
WatcherBackendChangeEvent,
WatchmanClocks,
} from './flow-types';
Expand Down Expand Up @@ -355,6 +356,25 @@ export default class FileMap extends EventEmitter {

const rootDir = this._options.rootDir;
this._startupPerfLogger?.point('constructFileSystem_start');
const processFile: ProcessFileFunction = async (
absolutePath,
metadata,
opts,
) => {
const result = await this._fileProcessor.processRegularFile(
absolutePath,
metadata,
{
computeSha1: opts.computeSha1,
computeDependencies: false,
maybeReturnContent: true,
},
);
debug('Lazily processed file: %s', absolutePath);
// Emit an event to inform caches that there is new data to save.
this.emit('metadata');
return result?.content;
};
const fileSystem =
initialData != null
? TreeFS.fromDeserializedSnapshot({
Expand All @@ -364,8 +384,9 @@ export default class FileMap extends EventEmitter {
// trust our cache manager that this is correct.
// $FlowIgnore
fileSystemData: initialData.fileSystemData,
processFile,
})
: new TreeFS({rootDir});
: new TreeFS({rootDir, processFile});
this._startupPerfLogger?.point('constructFileSystem_end');

const hastePlugin = new HastePlugin({
Expand Down Expand Up @@ -700,9 +721,14 @@ export default class FileMap extends EventEmitter {
changedSinceCacheRead: changed.size + removed.size > 0,
eventSource: {
onChange: cb => {
// Inform the cache about changes to internal state, including:
// - File system changes
this.on('change', cb);
// - Changes to stored metadata, e.g. on lazy processing.
this.on('metadata', cb);
return () => {
this.removeListener('change', cb);
this.removeListener('metadata', cb);
};
},
},
Expand Down
58 changes: 56 additions & 2 deletions packages/metro-file-map/src/lib/TreeFS.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import type {
LookupResult,
MutableFileSystem,
Path,
ProcessFileFunction,
} from '../flow-types';

import H from '../constants';
Expand Down Expand Up @@ -96,10 +97,20 @@ export default class TreeFS implements MutableFileSystem {
+#rootDir: Path;
#rootNode: DirectoryNode = new Map();
#pathUtils: RootPathUtils;
#processFile: ProcessFileFunction;

constructor({rootDir, files}: {rootDir: Path, files?: FileData}) {
constructor({
rootDir,
files,
processFile,
}: {
rootDir: Path,
files?: FileData,
processFile: ProcessFileFunction,
}) {
this.#rootDir = rootDir;
this.#pathUtils = new RootPathUtils(rootDir);
this.#processFile = processFile;
if (files != null) {
this.bulkAddOrModify(files);
}
Expand All @@ -112,11 +123,13 @@ export default class TreeFS implements MutableFileSystem {
static fromDeserializedSnapshot({
rootDir,
fileSystemData,
processFile,
}: {
rootDir: string,
fileSystemData: DirectoryNode,
processFile: ProcessFileFunction,
}): TreeFS {
const tfs = new TreeFS({rootDir});
const tfs = new TreeFS({rootDir, processFile});
tfs.#rootNode = fileSystemData;
return tfs;
}
Expand Down Expand Up @@ -192,6 +205,47 @@ export default class TreeFS implements MutableFileSystem {
return (fileMetadata && fileMetadata[H.SHA1]) ?? null;
}

async getOrComputeSha1(
mixedPath: Path,
): Promise<?{sha1: string, content?: Buffer}> {
const normalPath = this._normalizePath(mixedPath);
const result = this._lookupByNormalPath(normalPath, {
followLeaf: true,
});
if (!result.exists || isDirectory(result.node)) {
return null;
}
const {canonicalPath, node: fileMetadata} = result;

// Empty strings
const existing = fileMetadata[H.SHA1];
if (existing != null && existing.length > 0) {
return {sha1: existing};
}
const absolutePath = this.#pathUtils.normalToAbsolute(canonicalPath);

// Mutate the metadata we first retrieved. This may be orphaned or about
// to be overwritten if the file changes while we are processing it -
// by only mutating the original metadata, we don't risk caching a stale
// SHA-1 after a change event.
const maybeContent = await this.#processFile(absolutePath, fileMetadata, {
computeSha1: true,
});
const sha1 = fileMetadata[H.SHA1];
invariant(
sha1 != null && sha1.length > 0,
"File processing didn't populate a SHA-1 hash for %s",
absolutePath,
);

return maybeContent
? {
sha1,
content: maybeContent,
}
: {sha1};
}

exists(mixedPath: Path): boolean {
const result = this._getFileData(mixedPath);
return result != null;
Expand Down
103 changes: 103 additions & 0 deletions packages/metro-file-map/src/lib/__tests__/TreeFS-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import type {FileData} from '../../flow-types';
import type TreeFSType from '../TreeFS';

import H from '../../constants';

let mockPathModule;
jest.mock('path', () => mockPathModule);

Expand Down Expand Up @@ -49,6 +51,9 @@ describe.each([['win32'], ['posix']])('TreeFS on %s', platform => {
[p('node_modules/pkg/a.js'), ['a', 123, 0, 0, '', '', 0]],
[p('node_modules/pkg/package.json'), ['pkg', 123, 0, 0, '', '', 0]],
]),
processFile: () => {
throw new Error('Not implemented');
},
});
});

Expand Down Expand Up @@ -182,6 +187,9 @@ describe.each([['win32'], ['posix']])('TreeFS on %s', platform => {
[p('foo/index.js'), ['', 123, 0, 0, '', '', 0]],
[p('link-up'), ['', 123, 0, 0, '', '', p('..')]],
]),
processFile: () => {
throw new Error('Not implemented');
},
});
expect(tfs.lookup(p('/deep/missing/bar.js'))).toMatchObject({
exists: false,
Expand Down Expand Up @@ -339,6 +347,9 @@ describe.each([['win32'], ['posix']])('TreeFS on %s', platform => {
].map(posixPath => [p(posixPath), ['', 0, 0, 0, '', '', 0]]),
),
),
processFile: () => {
throw new Error('Not implemented');
},
});
});

Expand Down Expand Up @@ -859,4 +870,96 @@ describe.each([['win32'], ['posix']])('TreeFS on %s', platform => {
);
});
});

describe('getOrComputeSha1', () => {
const mockProcessFile = jest.fn();

beforeEach(() => {
tfs = new TreeFS({
rootDir: p('/project'),
files: new Map([
[p('foo.js'), ['', 123, 0, 0, '', 'def456', 0]],
[p('bar.js'), ['', 123, 0, 0, '', '', 0]],
[p('link-to-bar'), ['', 456, 0, 0, '', '', p('./bar.js')]],
]),
processFile: mockProcessFile,
});
mockProcessFile.mockImplementation(async (filePath, metadata) => {
metadata[H.SHA1] = 'abc123';
return;
});
mockProcessFile.mockClear();
});

test('returns the precomputed SHA-1 of a file if set', async () => {
expect(await tfs.getOrComputeSha1(p('foo.js'))).toEqual({sha1: 'def456'});
expect(mockProcessFile).not.toHaveBeenCalled();
});

test('calls processFile exactly once if SHA-1 not initially set', async () => {
expect(await tfs.getOrComputeSha1(p('bar.js'))).toEqual({sha1: 'abc123'});
expect(mockProcessFile).toHaveBeenCalledWith(
p('/project/bar.js'),
expect.any(Array),
{computeSha1: true},
);
mockProcessFile.mockClear();
expect(await tfs.getOrComputeSha1(p('bar.js'))).toEqual({sha1: 'abc123'});
expect(mockProcessFile).not.toHaveBeenCalled();
});

test('returns file contents alongside SHA-1 if processFile provides it', async () => {
mockProcessFile.mockImplementationOnce(async (filePath, metadata) => {
metadata[H.SHA1] = 'bcd234';
return Buffer.from('content');
});
expect(await tfs.getOrComputeSha1(p('bar.js'))).toEqual({
sha1: 'bcd234',
content: Buffer.from('content'),
});
expect(mockProcessFile).toHaveBeenCalledWith(
p('/project/bar.js'),
expect.any(Array),
{computeSha1: true},
);
mockProcessFile.mockClear();
expect(await tfs.getOrComputeSha1(p('bar.js'))).toEqual({
sha1: 'bcd234',
content: undefined,
});
expect(mockProcessFile).not.toHaveBeenCalled();
});

test('calls processFile on resolved symlink targets', async () => {
expect(await tfs.getOrComputeSha1(p('link-to-bar'))).toEqual({
sha1: 'abc123',
});
expect(mockProcessFile).toHaveBeenCalledWith(
p('/project/bar.js'),
expect.any(Array),
{computeSha1: true},
);
});

test('clears stored SHA-1 on modification', async () => {
let resolve: (sha1: string) => void;
const processPromise = new Promise(r => (resolve = r));
mockProcessFile.mockImplementationOnce(async (filePath, metadata) => {
metadata[H.SHA1] = await processPromise;
});
const getOrComputePromise = tfs.getOrComputeSha1(p('bar.js'));
expect(mockProcessFile).toHaveBeenCalledWith(
p('/project/bar.js'),
expect.any(Array),
{computeSha1: true},
);
// Simulate the file being modified while we're waiting for the SHA1.
tfs.addOrModify(p('bar.js'), ['', 123, 0, 0, '', '', 0]);
resolve?.('newsha1');
expect(await getOrComputePromise).toEqual({sha1: 'newsha1'});
// A second call re-computes
expect(await tfs.getOrComputeSha1(p('bar.js'))).toEqual({sha1: 'abc123'});
expect(mockProcessFile).toHaveBeenCalledTimes(2);
});
});
});

0 comments on commit a995194

Please sign in to comment.