4.5 KB140 lines
Blame
1/**
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8import type {AbsolutePath, RepoRelativePath} from 'isl/src/types';
9import type {Repository} from './Repository';
10import type {RepositoryContext} from './serverTypes';
11
12import {GeneratedStatus} from 'isl/src/types';
13import {promises as fs} from 'node:fs';
14import pathMod from 'node:path';
15import {LRU} from 'shared/LRU';
16import {group} from 'shared/utils';
17import {Internal} from './Internal';
18
19export const GENERATED_TAG = '@' + 'generated';
20export const PARTIALLY_GENERATED_TAG = '@' + 'partially-generated';
21
22const defaultGeneratedPathRegex =
23 Internal.generatedFilesRegex ??
24 /(yarn\.lock$|package-lock\.json$|node_modules\/.*$|\.py[odc]$|\.class$|\.[oa]$|\.so$|Gemfile\.lock$|go\.sum$|Cargo\.lock$|\.dll$|\.exe$|\.pdb$|composer\.lock$|Podfile\.lock$)/;
25
26async function getGeneratedFilePathRegex(
27 repo: Repository,
28 ctx: RepositoryContext,
29): Promise<RegExp> {
30 const configuredPathRegex = await repo.getConfig(ctx, 'isl.generated-files-regex');
31 let regex = defaultGeneratedPathRegex;
32 if (configuredPathRegex) {
33 try {
34 regex = new RegExp(configuredPathRegex);
35 } catch (err) {
36 repo.initialConnectionContext.logger.error(
37 'Configured generated files regex is invalid',
38 err,
39 );
40 }
41 }
42 return regex;
43}
44
45function readFilesLookingForGeneratedTag(
46 cwd: AbsolutePath,
47 files: Array<string>,
48): Promise<Array<[RepoRelativePath, GeneratedStatus]>> {
49 return Promise.all(
50 files.map(async (path): Promise<[RepoRelativePath, GeneratedStatus]> => {
51 let chunk;
52 try {
53 const f = await fs.open(pathMod.join(cwd, path));
54 chunk = await f.read({length: 1024});
55 f.close();
56 } catch (e) {
57 // e.g. missing files considered Manual. This can happen when queries files in non-head commits.
58 // More accurate would be to `sl cat`, but that's expensive.
59 return [path, GeneratedStatus.Manual];
60 }
61 if (chunk.buffer.includes(GENERATED_TAG)) {
62 return [path, GeneratedStatus.Generated];
63 } else if (chunk.buffer.includes(PARTIALLY_GENERATED_TAG)) {
64 return [path, GeneratedStatus.PartiallyGenerated];
65 }
66 return [path, GeneratedStatus.Manual];
67 }),
68 );
69}
70
71export class GeneratedFilesDetector {
72 // We assume the same file path doesn't switch generated status, so we can cache aggressively.
73 private cache = new LRU<RepoRelativePath, GeneratedStatus>(1500);
74
75 /**
76 * Given a list of files, return an object mapping path to Generated Status.
77 * Files are determined to be generated by looking in the first 512 bytes for @ + generated,
78 * or partially generated by looking for @ + partially-generated.
79 */
80 public async queryFilesGenerated(
81 repo: Repository,
82 ctx: RepositoryContext,
83 root: AbsolutePath,
84 files: Array<RepoRelativePath>,
85 ): Promise<Record<RepoRelativePath, GeneratedStatus>> {
86 if (files.length === 0) {
87 return {};
88 }
89 const t1 = performance.now();
90 const {logger} = ctx;
91
92 const regex = await getGeneratedFilePathRegex(repo, ctx);
93
94 const results = group(
95 files,
96 // (1) try the cache
97 // (2) test if it matches the generated file regex, if so, it's generated
98 // (3) if the regex fails or its not in cache, we need to try reading the file
99 file =>
100 this.cache.get(file) ??
101 (regex.test(file) ? GeneratedStatus.Generated : undefined) ??
102 'notCached',
103 );
104
105 const needsCheck = results.notCached ?? [];
106
107 const checkResult =
108 needsCheck.length === 0 ? [] : await readFilesLookingForGeneratedTag(root, needsCheck);
109
110 const remaining = new Set(needsCheck);
111 for (const [path, st] of checkResult) {
112 this.cache.set(path, st);
113 remaining.delete(path);
114 }
115
116 const t2 = performance.now();
117 logger.info(
118 `Generated file query took ${Math.floor((10 * (t2 - t1)) / 10)}ms for ${
119 files.length
120 } files. (${needsCheck.length} not cached)`,
121 );
122
123 return Object.fromEntries([
124 ...(results[GeneratedStatus.Manual]?.map(p => [p, GeneratedStatus.Manual]) ?? []),
125 ...(results[GeneratedStatus.Generated]?.map(p => [p, GeneratedStatus.Generated]) ?? []),
126 ...(results[GeneratedStatus.PartiallyGenerated]?.map(p => [
127 p,
128 GeneratedStatus.PartiallyGenerated,
129 ]) ?? []),
130 ...checkResult,
131 ]);
132 }
133
134 public clear() {
135 this.cache.clear();
136 }
137}
138
139export const generatedFilesDetector = new GeneratedFilesDetector();
140