addons/shared/patch/parse.tsblame
View source
b69ab311/**
b69ab312 * Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab313 *
b69ab314 * This source code is licensed under the MIT license found in the
b69ab315 * LICENSE file in the root directory of this source tree.
b69ab316 */
b69ab317
b69ab318import {diffBlocks} from '../diff';
b69ab319import {stringifyPatch} from './stringify';
b69ab3110import type {Hunk, ParsedDiff} from './types';
b69ab3111import {DiffType} from './types';
b69ab3112
b69ab3113const DIFF = /^diff --git (.*) (.*)$/;
b69ab3114const RENAME_FROM = /^rename from (.*)$/;
b69ab3115const RENAME_TO = /^rename to (.*)$/;
b69ab3116const COPY_FROM = /^copy from (.*)$/;
b69ab3117const COPY_TO = /^copy to (.*)$/;
b69ab3118const NEW_FILE_MODE = /^new file mode (\d{6})$/;
b69ab3119const DELETED_FILE_MODE = /^deleted file mode (\d{6})$/;
b69ab3120const OLD_MODE = /^old mode (\d{6})$/;
b69ab3121const NEW_MODE = /^new mode (\d{6})$/;
b69ab3122const HUNK_HEADER = /@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/;
b69ab3123const OLD_FILE_HEADER = /^--- (.*)$/;
b69ab3124const NEW_FILE_HEADER = /^\+\+\+ (.*)$/;
b69ab3125
b69ab3126const DELIMITERS = /\r\n|[\n\v\f\r\x85]/g;
b69ab3127
b69ab3128function assert(condition: unknown, msg?: string): asserts condition {
b69ab3129 if (condition === false) {
b69ab3130 throw new Error(msg);
b69ab3131 }
b69ab3132}
b69ab3133
b69ab3134/**
b69ab3135 * Parse git diff format string.
b69ab3136 *
b69ab3137 * The diff library we were using does not support git diff format (rename,
b69ab3138 * copy, empty file, file mode change etc). This function is to extend the
b69ab3139 * original `parsePatch` function [1] and make it support git diff format [2].
b69ab3140 *
b69ab3141 * [1] https://github.com/DefinitelyTyped/DefinitelyTyped/blob/master/types/diff/index.d.ts#L388
b69ab3142 * [2] https://github.com/git/git-scm.com/blob/main/spec/data/diff-generate-patch.txt
b69ab3143 */
b69ab3144export function parsePatch(patch: string): ParsedDiff[] {
b69ab3145 const diffstr: string[] = patch.split(DELIMITERS);
b69ab3146 const delimiters: string[] = patch.match(DELIMITERS) || [];
b69ab3147 const list: ParsedDiff[] = [];
b69ab3148 let i = 0;
b69ab3149
b69ab3150 function parseIndex() {
b69ab3151 const index: ParsedDiff = {hunks: []};
b69ab3152 list.push(index);
b69ab3153
b69ab3154 parseHeader(index);
b69ab3155
b69ab3156 // Parse one or more extended header lines
b69ab3157 while (i < diffstr.length) {
b69ab3158 const line = diffstr[i];
b69ab3159 if (/^old mode/.test(line)) {
b69ab3160 parseOldMode(index);
b69ab3161 } else if (/^new mode/.test(line)) {
b69ab3162 parseNewMode(index);
b69ab3163 } else if (/^deleted file mode/.test(line)) {
b69ab3164 parseDeletedFileMode(index);
b69ab3165 } else if (/^new file mode/.test(line)) {
b69ab3166 parseNewFileMode(index);
b69ab3167 } else if (/^copy /.test(line)) {
b69ab3168 parseCopy(index);
b69ab3169 } else if (/^rename /.test(line)) {
b69ab3170 parseRename(index);
b69ab3171 } else if (/^--- /.test(line)) {
b69ab3172 parseFileHeader(index);
b69ab3173 break;
b69ab3174 } else if (/^diff --git/.test(line)) {
b69ab3175 // a new index starts
b69ab3176 break;
b69ab3177 } else {
b69ab3178 // ignore other types (e.g. similarity etc)
b69ab3179 i++;
b69ab3180 }
b69ab3181 }
b69ab3182
b69ab3183 parseHunks(index);
b69ab3184 }
b69ab3185
b69ab3186 function parseHeader(index: ParsedDiff) {
b69ab3187 while (i < diffstr.length) {
b69ab3188 const line = diffstr[i];
b69ab3189 // Diff index
b69ab3190 const header = DIFF.exec(line);
b69ab3191 if (header) {
b69ab3192 index.oldFileName = header[1];
b69ab3193 index.newFileName = header[2];
b69ab3194 i++;
b69ab3195 break;
b69ab3196 }
b69ab3197 i++;
b69ab3198 }
b69ab3199 }
b69ab31100
b69ab31101 function parseOldMode(index: ParsedDiff) {
b69ab31102 const arr = OLD_MODE.exec(diffstr[i]);
b69ab31103 assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31104 index.oldMode = arr[1];
b69ab31105 index.type = DiffType.Modified;
b69ab31106 i++;
b69ab31107 }
b69ab31108
b69ab31109 function parseNewMode(index: ParsedDiff) {
b69ab31110 const arr = NEW_MODE.exec(diffstr[i]);
b69ab31111 assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31112 index.newMode = arr[1];
b69ab31113 index.type = DiffType.Modified;
b69ab31114 i++;
b69ab31115 }
b69ab31116
b69ab31117 function parseDeletedFileMode(index: ParsedDiff) {
b69ab31118 const arr = DELETED_FILE_MODE.exec(diffstr[i]);
b69ab31119 assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31120 index.newMode = arr[1];
b69ab31121 index.type = DiffType.Removed;
b69ab31122 i++;
b69ab31123 }
b69ab31124
b69ab31125 function parseNewFileMode(index: ParsedDiff) {
b69ab31126 const arr = NEW_FILE_MODE.exec(diffstr[i]);
b69ab31127 assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31128 index.newMode = arr[1];
b69ab31129 index.type = DiffType.Added;
b69ab31130 i++;
b69ab31131 }
b69ab31132
b69ab31133 function parseCopy(index: ParsedDiff) {
b69ab31134 assert(COPY_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
b69ab31135 assert(COPY_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
b69ab31136 index.type = DiffType.Copied;
b69ab31137 i += 2;
b69ab31138 }
b69ab31139
b69ab31140 function parseRename(index: ParsedDiff) {
b69ab31141 assert(RENAME_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
b69ab31142 assert(RENAME_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
b69ab31143 index.type = DiffType.Renamed;
b69ab31144 i += 2;
b69ab31145 }
b69ab31146
b69ab31147 function parseFileHeader(index: ParsedDiff) {
b69ab31148 assert(OLD_FILE_HEADER.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
b69ab31149 assert(NEW_FILE_HEADER.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
b69ab31150 if (index.type === undefined) {
b69ab31151 index.type = DiffType.Modified;
b69ab31152 }
b69ab31153 i += 2;
b69ab31154 }
b69ab31155
b69ab31156 function parseHunks(index: ParsedDiff) {
b69ab31157 while (i < diffstr.length) {
b69ab31158 const line = diffstr[i];
b69ab31159 if (DIFF.test(line)) {
b69ab31160 break;
b69ab31161 } else if (/^@@/.test(line)) {
b69ab31162 index.hunks.push(parseHunk());
b69ab31163 } else {
b69ab31164 // ignore unexpected content
b69ab31165 i++;
b69ab31166 }
b69ab31167 }
b69ab31168 }
b69ab31169
b69ab31170 /*
b69ab31171 * Parses a hunk. This is copied from jsdiff library:
b69ab31172 * https://github.com/kpdecker/jsdiff/blob/master/src/patch/parse.js
b69ab31173 */
b69ab31174 function parseHunk(): Hunk {
b69ab31175 const hunkHeaderLine = diffstr[i++];
b69ab31176 const hunkHeader = hunkHeaderLine.split(HUNK_HEADER);
b69ab31177
b69ab31178 const hunk: Hunk = {
b69ab31179 oldStart: +hunkHeader[1],
b69ab31180 oldLines: typeof hunkHeader[2] === 'undefined' ? 1 : +hunkHeader[2],
b69ab31181 newStart: +hunkHeader[3],
b69ab31182 newLines: typeof hunkHeader[4] === 'undefined' ? 1 : +hunkHeader[4],
b69ab31183 lines: [],
b69ab31184 linedelimiters: [],
b69ab31185 };
b69ab31186
b69ab31187 // Unified Diff Format quirk: If the hunk size is 0,
b69ab31188 // the first number is one lower than one would expect.
b69ab31189 // https://www.artima.com/weblogs/viewpost.jsp?thread=164293
b69ab31190 if (hunk.oldLines === 0) {
b69ab31191 hunk.oldStart += 1;
b69ab31192 }
b69ab31193 if (hunk.newLines === 0) {
b69ab31194 hunk.newStart += 1;
b69ab31195 }
b69ab31196
b69ab31197 let addCount = 0,
b69ab31198 removeCount = 0;
b69ab31199 for (; i < diffstr.length; i++) {
b69ab31200 // Lines starting with '---' could be mistaken for the "remove line" operation
b69ab31201 // But they could be the header for the next file. Therefore prune such cases out.
b69ab31202 if (
b69ab31203 diffstr[i].indexOf('--- ') === 0 &&
b69ab31204 i + 2 < diffstr.length &&
b69ab31205 diffstr[i + 1].indexOf('+++ ') === 0 &&
b69ab31206 diffstr[i + 2].indexOf('@@') === 0
b69ab31207 ) {
b69ab31208 break;
b69ab31209 }
b69ab31210 const operation = diffstr[i].length == 0 && i != diffstr.length - 1 ? ' ' : diffstr[i][0];
b69ab31211
b69ab31212 if (operation === '+' || operation === '-' || operation === ' ' || operation === '\\') {
b69ab31213 hunk.lines.push(diffstr[i]);
b69ab31214 hunk.linedelimiters.push(delimiters[i] || '\n');
b69ab31215
b69ab31216 if (operation === '+') {
b69ab31217 addCount++;
b69ab31218 } else if (operation === '-') {
b69ab31219 removeCount++;
b69ab31220 } else if (operation === ' ') {
b69ab31221 addCount++;
b69ab31222 removeCount++;
b69ab31223 }
b69ab31224 } else {
b69ab31225 break;
b69ab31226 }
b69ab31227 }
b69ab31228
b69ab31229 // Handle the empty block count case
b69ab31230 if (!addCount && hunk.newLines === 1) {
b69ab31231 hunk.newLines = 0;
b69ab31232 }
b69ab31233 if (!removeCount && hunk.oldLines === 1) {
b69ab31234 hunk.oldLines = 0;
b69ab31235 }
b69ab31236
b69ab31237 return hunk;
b69ab31238 }
b69ab31239
b69ab31240 while (i < diffstr.length) {
b69ab31241 parseIndex();
b69ab31242 }
b69ab31243
b69ab31244 return list;
b69ab31245}
b69ab31246
b69ab31247/**
b69ab31248 * Guess if it's a submodule change by the shape of the hunks.
b69ab31249 * sl diff doesn't have file mode in the outputs yet.
b69ab31250 *
b69ab31251 * Diff pattern for a submodule change:
b69ab31252 *
b69ab31253 * diff --git a/path/to/submodule b/path/to/submodule
b69ab31254 * --- a/path/to/submodule
b69ab31255 * +++ b/path/to/submodule
b69ab31256 * @@ -1,1 +1,1 @@
b69ab31257 * -Subproject commit <hash>
b69ab31258 * +Subproject commit <hash>
b69ab31259 *
b69ab31260 * Diff pattern for adding a submodule:
b69ab31261 *
b69ab31262 * diff --git a/path/to/submodule b/path/to/submodule
b69ab31263 * new file mode 160000
b69ab31264 * --- /dev/null
b69ab31265 * +++ b/path/to/submodule
b69ab31266 * @@ -0,0 +1,1 @@
b69ab31267 * +Subproject commit <hash>
b69ab31268 */
b69ab31269export function guessIsSubmodule(patch: ParsedDiff): boolean {
b69ab31270 if (patch.hunks.length !== 1) {
b69ab31271 return false;
b69ab31272 }
b69ab31273 const hunk = patch.hunks[0];
b69ab31274 const oldLine = /^-Subproject commit [0-9A-Fa-f]{7,64}$/;
b69ab31275 const newLine = /^\+Subproject commit [0-9A-Fa-f]{7,64}$/;
b69ab31276 return (
b69ab31277 (patch.type === DiffType.Modified &&
b69ab31278 hunk.newLines === 1 &&
b69ab31279 hunk.newStart === 1 &&
b69ab31280 hunk.oldLines === 1 &&
b69ab31281 hunk.oldStart === 1 &&
b69ab31282 hunk.lines.length === 2 &&
b69ab31283 oldLine.exec(hunk.lines[0]) !== null &&
b69ab31284 newLine.exec(hunk.lines[1]) !== null) ||
b69ab31285 (patch.type === DiffType.Added && patch.newMode === '160000')
b69ab31286 );
b69ab31287}
b69ab31288
b69ab31289export function parseParsedDiff(
b69ab31290 oldCodeLines: string[],
b69ab31291 newCodeLines: string[],
b69ab31292 lineNumber: number,
b69ab31293 oldFileName?: string,
b69ab31294 newFileName?: string,
b69ab31295): ParsedDiff {
b69ab31296 const hunks: Hunk[] = [];
b69ab31297 const blocks = diffBlocks(oldCodeLines, newCodeLines);
b69ab31298
b69ab31299 blocks.forEach(block => {
b69ab31300 if (block[0] === '=') {
b69ab31301 return;
b69ab31302 }
b69ab31303
b69ab31304 const oldRange = [block[1][0], block[1][1]];
b69ab31305 const newRange = [block[1][2], block[1][3]];
b69ab31306
b69ab31307 const oldLines = oldCodeLines.slice(oldRange[0], oldRange[1]).map(codeStr => '-' + codeStr);
b69ab31308 const newLines = newCodeLines.slice(newRange[0], newRange[1]).map(codeStr => '+' + codeStr);
b69ab31309 const delimiters = new Array(oldLines.length + newLines.length).fill('\n');
b69ab31310
b69ab31311 const hunk: Hunk = {
b69ab31312 oldStart: lineNumber + oldRange[0],
b69ab31313 oldLines: oldLines.length ?? 0,
b69ab31314 newStart: lineNumber + newRange[0],
b69ab31315 newLines: newLines.length ?? 0,
b69ab31316 lines: oldLines.concat(newLines),
b69ab31317 linedelimiters: delimiters,
b69ab31318 };
b69ab31319
b69ab31320 hunks.push(hunk);
b69ab31321 });
b69ab31322
b69ab31323 return {
b69ab31324 oldFileName,
b69ab31325 newFileName,
b69ab31326 hunks,
b69ab31327 } as ParsedDiff;
b69ab31328}
b69ab31329
b69ab31330/** Given a diff patch, filter out changes to files that are in the list. */
b69ab31331export function filterFilesFromPatch(patch: string, files: string[]): string {
b69ab31332 const parsedDiffs = parsePatch(patch);
b69ab31333
b69ab31334 // Normalize the files array - remove 'a/' and 'b/' prefixes if present
b69ab31335 const normalizedFiles = files.map(f => {
b69ab31336 const withoutA = f.replace(/^a\//, '');
b69ab31337 const withoutB = f.replace(/^b\//, '');
b69ab31338 return withoutA.length < withoutB.length ? withoutA : withoutB;
b69ab31339 });
b69ab31340
b69ab31341 // Filter out diffs where the filename matches any in the files array
b69ab31342 const filteredDiffs = parsedDiffs.filter(diff => {
b69ab31343 // Extract filenames without a/ and b/ prefixes
b69ab31344 const oldFile = diff.oldFileName?.replace(/^a\//, '');
b69ab31345 const newFile = diff.newFileName?.replace(/^b\//, '');
b69ab31346
b69ab31347 // Check if either filename matches any file in the filter list
b69ab31348 const shouldFilter = normalizedFiles.some(file => file === oldFile || file === newFile);
b69ab31349
b69ab31350 return !shouldFilter;
b69ab31351 });
b69ab31352
b69ab31353 return stringifyPatch(filteredDiffs);
b69ab31354}