addons/isl-server/src/GeneratedFiles.tsblame
View source
b69ab311/**
b69ab312 * Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab313 *
b69ab314 * This source code is licensed under the MIT license found in the
b69ab315 * LICENSE file in the root directory of this source tree.
b69ab316 */
b69ab317
b69ab318import type {AbsolutePath, RepoRelativePath} from 'isl/src/types';
b69ab319import type {Repository} from './Repository';
b69ab3110import type {RepositoryContext} from './serverTypes';
b69ab3111
b69ab3112import {GeneratedStatus} from 'isl/src/types';
b69ab3113import {promises as fs} from 'node:fs';
b69ab3114import pathMod from 'node:path';
b69ab3115import {LRU} from 'shared/LRU';
b69ab3116import {group} from 'shared/utils';
b69ab3117import {Internal} from './Internal';
b69ab3118
b69ab3119export const GENERATED_TAG = '@' + 'generated';
b69ab3120export const PARTIALLY_GENERATED_TAG = '@' + 'partially-generated';
b69ab3121
b69ab3122const defaultGeneratedPathRegex =
b69ab3123 Internal.generatedFilesRegex ??
b69ab3124 /(yarn\.lock$|package-lock\.json$|node_modules\/.*$|\.py[odc]$|\.class$|\.[oa]$|\.so$|Gemfile\.lock$|go\.sum$|Cargo\.lock$|\.dll$|\.exe$|\.pdb$|composer\.lock$|Podfile\.lock$)/;
b69ab3125
b69ab3126async function getGeneratedFilePathRegex(
b69ab3127 repo: Repository,
b69ab3128 ctx: RepositoryContext,
b69ab3129): Promise<RegExp> {
b69ab3130 const configuredPathRegex = await repo.getConfig(ctx, 'isl.generated-files-regex');
b69ab3131 let regex = defaultGeneratedPathRegex;
b69ab3132 if (configuredPathRegex) {
b69ab3133 try {
b69ab3134 regex = new RegExp(configuredPathRegex);
b69ab3135 } catch (err) {
b69ab3136 repo.initialConnectionContext.logger.error(
b69ab3137 'Configured generated files regex is invalid',
b69ab3138 err,
b69ab3139 );
b69ab3140 }
b69ab3141 }
b69ab3142 return regex;
b69ab3143}
b69ab3144
b69ab3145function readFilesLookingForGeneratedTag(
b69ab3146 cwd: AbsolutePath,
b69ab3147 files: Array<string>,
b69ab3148): Promise<Array<[RepoRelativePath, GeneratedStatus]>> {
b69ab3149 return Promise.all(
b69ab3150 files.map(async (path): Promise<[RepoRelativePath, GeneratedStatus]> => {
b69ab3151 let chunk;
b69ab3152 try {
b69ab3153 const f = await fs.open(pathMod.join(cwd, path));
b69ab3154 chunk = await f.read({length: 1024});
b69ab3155 f.close();
b69ab3156 } catch (e) {
b69ab3157 // e.g. missing files considered Manual. This can happen when queries files in non-head commits.
b69ab3158 // More accurate would be to `sl cat`, but that's expensive.
b69ab3159 return [path, GeneratedStatus.Manual];
b69ab3160 }
b69ab3161 if (chunk.buffer.includes(GENERATED_TAG)) {
b69ab3162 return [path, GeneratedStatus.Generated];
b69ab3163 } else if (chunk.buffer.includes(PARTIALLY_GENERATED_TAG)) {
b69ab3164 return [path, GeneratedStatus.PartiallyGenerated];
b69ab3165 }
b69ab3166 return [path, GeneratedStatus.Manual];
b69ab3167 }),
b69ab3168 );
b69ab3169}
b69ab3170
b69ab3171export class GeneratedFilesDetector {
b69ab3172 // We assume the same file path doesn't switch generated status, so we can cache aggressively.
b69ab3173 private cache = new LRU<RepoRelativePath, GeneratedStatus>(1500);
b69ab3174
b69ab3175 /**
b69ab3176 * Given a list of files, return an object mapping path to Generated Status.
b69ab3177 * Files are determined to be generated by looking in the first 512 bytes for @ + generated,
b69ab3178 * or partially generated by looking for @ + partially-generated.
b69ab3179 */
b69ab3180 public async queryFilesGenerated(
b69ab3181 repo: Repository,
b69ab3182 ctx: RepositoryContext,
b69ab3183 root: AbsolutePath,
b69ab3184 files: Array<RepoRelativePath>,
b69ab3185 ): Promise<Record<RepoRelativePath, GeneratedStatus>> {
b69ab3186 if (files.length === 0) {
b69ab3187 return {};
b69ab3188 }
b69ab3189 const t1 = performance.now();
b69ab3190 const {logger} = ctx;
b69ab3191
b69ab3192 const regex = await getGeneratedFilePathRegex(repo, ctx);
b69ab3193
b69ab3194 const results = group(
b69ab3195 files,
b69ab3196 // (1) try the cache
b69ab3197 // (2) test if it matches the generated file regex, if so, it's generated
b69ab3198 // (3) if the regex fails or its not in cache, we need to try reading the file
b69ab3199 file =>
b69ab31100 this.cache.get(file) ??
b69ab31101 (regex.test(file) ? GeneratedStatus.Generated : undefined) ??
b69ab31102 'notCached',
b69ab31103 );
b69ab31104
b69ab31105 const needsCheck = results.notCached ?? [];
b69ab31106
b69ab31107 const checkResult =
b69ab31108 needsCheck.length === 0 ? [] : await readFilesLookingForGeneratedTag(root, needsCheck);
b69ab31109
b69ab31110 const remaining = new Set(needsCheck);
b69ab31111 for (const [path, st] of checkResult) {
b69ab31112 this.cache.set(path, st);
b69ab31113 remaining.delete(path);
b69ab31114 }
b69ab31115
b69ab31116 const t2 = performance.now();
b69ab31117 logger.info(
b69ab31118 `Generated file query took ${Math.floor((10 * (t2 - t1)) / 10)}ms for ${
b69ab31119 files.length
b69ab31120 } files. (${needsCheck.length} not cached)`,
b69ab31121 );
b69ab31122
b69ab31123 return Object.fromEntries([
b69ab31124 ...(results[GeneratedStatus.Manual]?.map(p => [p, GeneratedStatus.Manual]) ?? []),
b69ab31125 ...(results[GeneratedStatus.Generated]?.map(p => [p, GeneratedStatus.Generated]) ?? []),
b69ab31126 ...(results[GeneratedStatus.PartiallyGenerated]?.map(p => [
b69ab31127 p,
b69ab31128 GeneratedStatus.PartiallyGenerated,
b69ab31129 ]) ?? []),
b69ab31130 ...checkResult,
b69ab31131 ]);
b69ab31132 }
b69ab31133
b69ab31134 public clear() {
b69ab31135 this.cache.clear();
b69ab31136 }
b69ab31137}
b69ab31138
b69ab31139export const generatedFilesDetector = new GeneratedFilesDetector();