10.3 KB355 lines
Blame
1/**
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8import {diffBlocks} from '../diff';
9import {stringifyPatch} from './stringify';
10import type {Hunk, ParsedDiff} from './types';
11import {DiffType} from './types';
12
13const DIFF = /^diff --git (.*) (.*)$/;
14const RENAME_FROM = /^rename from (.*)$/;
15const RENAME_TO = /^rename to (.*)$/;
16const COPY_FROM = /^copy from (.*)$/;
17const COPY_TO = /^copy to (.*)$/;
18const NEW_FILE_MODE = /^new file mode (\d{6})$/;
19const DELETED_FILE_MODE = /^deleted file mode (\d{6})$/;
20const OLD_MODE = /^old mode (\d{6})$/;
21const NEW_MODE = /^new mode (\d{6})$/;
22const HUNK_HEADER = /@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/;
23const OLD_FILE_HEADER = /^--- (.*)$/;
24const NEW_FILE_HEADER = /^\+\+\+ (.*)$/;
25
26const DELIMITERS = /\r\n|[\n\v\f\r\x85]/g;
27
28function assert(condition: unknown, msg?: string): asserts condition {
29 if (condition === false) {
30 throw new Error(msg);
31 }
32}
33
34/**
35 * Parse git diff format string.
36 *
37 * The diff library we were using does not support git diff format (rename,
38 * copy, empty file, file mode change etc). This function is to extend the
39 * original `parsePatch` function [1] and make it support git diff format [2].
40 *
41 * [1] https://github.com/DefinitelyTyped/DefinitelyTyped/blob/master/types/diff/index.d.ts#L388
42 * [2] https://github.com/git/git-scm.com/blob/main/spec/data/diff-generate-patch.txt
43 */
44export function parsePatch(patch: string): ParsedDiff[] {
45 const diffstr: string[] = patch.split(DELIMITERS);
46 const delimiters: string[] = patch.match(DELIMITERS) || [];
47 const list: ParsedDiff[] = [];
48 let i = 0;
49
50 function parseIndex() {
51 const index: ParsedDiff = {hunks: []};
52 list.push(index);
53
54 parseHeader(index);
55
56 // Parse one or more extended header lines
57 while (i < diffstr.length) {
58 const line = diffstr[i];
59 if (/^old mode/.test(line)) {
60 parseOldMode(index);
61 } else if (/^new mode/.test(line)) {
62 parseNewMode(index);
63 } else if (/^deleted file mode/.test(line)) {
64 parseDeletedFileMode(index);
65 } else if (/^new file mode/.test(line)) {
66 parseNewFileMode(index);
67 } else if (/^copy /.test(line)) {
68 parseCopy(index);
69 } else if (/^rename /.test(line)) {
70 parseRename(index);
71 } else if (/^--- /.test(line)) {
72 parseFileHeader(index);
73 break;
74 } else if (/^diff --git/.test(line)) {
75 // a new index starts
76 break;
77 } else {
78 // ignore other types (e.g. similarity etc)
79 i++;
80 }
81 }
82
83 parseHunks(index);
84 }
85
86 function parseHeader(index: ParsedDiff) {
87 while (i < diffstr.length) {
88 const line = diffstr[i];
89 // Diff index
90 const header = DIFF.exec(line);
91 if (header) {
92 index.oldFileName = header[1];
93 index.newFileName = header[2];
94 i++;
95 break;
96 }
97 i++;
98 }
99 }
100
101 function parseOldMode(index: ParsedDiff) {
102 const arr = OLD_MODE.exec(diffstr[i]);
103 assert(arr !== null, `invalid format '${diffstr[i]}'`);
104 index.oldMode = arr[1];
105 index.type = DiffType.Modified;
106 i++;
107 }
108
109 function parseNewMode(index: ParsedDiff) {
110 const arr = NEW_MODE.exec(diffstr[i]);
111 assert(arr !== null, `invalid format '${diffstr[i]}'`);
112 index.newMode = arr[1];
113 index.type = DiffType.Modified;
114 i++;
115 }
116
117 function parseDeletedFileMode(index: ParsedDiff) {
118 const arr = DELETED_FILE_MODE.exec(diffstr[i]);
119 assert(arr !== null, `invalid format '${diffstr[i]}'`);
120 index.newMode = arr[1];
121 index.type = DiffType.Removed;
122 i++;
123 }
124
125 function parseNewFileMode(index: ParsedDiff) {
126 const arr = NEW_FILE_MODE.exec(diffstr[i]);
127 assert(arr !== null, `invalid format '${diffstr[i]}'`);
128 index.newMode = arr[1];
129 index.type = DiffType.Added;
130 i++;
131 }
132
133 function parseCopy(index: ParsedDiff) {
134 assert(COPY_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
135 assert(COPY_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
136 index.type = DiffType.Copied;
137 i += 2;
138 }
139
140 function parseRename(index: ParsedDiff) {
141 assert(RENAME_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
142 assert(RENAME_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
143 index.type = DiffType.Renamed;
144 i += 2;
145 }
146
147 function parseFileHeader(index: ParsedDiff) {
148 assert(OLD_FILE_HEADER.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
149 assert(NEW_FILE_HEADER.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
150 if (index.type === undefined) {
151 index.type = DiffType.Modified;
152 }
153 i += 2;
154 }
155
156 function parseHunks(index: ParsedDiff) {
157 while (i < diffstr.length) {
158 const line = diffstr[i];
159 if (DIFF.test(line)) {
160 break;
161 } else if (/^@@/.test(line)) {
162 index.hunks.push(parseHunk());
163 } else {
164 // ignore unexpected content
165 i++;
166 }
167 }
168 }
169
170 /*
171 * Parses a hunk. This is copied from jsdiff library:
172 * https://github.com/kpdecker/jsdiff/blob/master/src/patch/parse.js
173 */
174 function parseHunk(): Hunk {
175 const hunkHeaderLine = diffstr[i++];
176 const hunkHeader = hunkHeaderLine.split(HUNK_HEADER);
177
178 const hunk: Hunk = {
179 oldStart: +hunkHeader[1],
180 oldLines: typeof hunkHeader[2] === 'undefined' ? 1 : +hunkHeader[2],
181 newStart: +hunkHeader[3],
182 newLines: typeof hunkHeader[4] === 'undefined' ? 1 : +hunkHeader[4],
183 lines: [],
184 linedelimiters: [],
185 };
186
187 // Unified Diff Format quirk: If the hunk size is 0,
188 // the first number is one lower than one would expect.
189 // https://www.artima.com/weblogs/viewpost.jsp?thread=164293
190 if (hunk.oldLines === 0) {
191 hunk.oldStart += 1;
192 }
193 if (hunk.newLines === 0) {
194 hunk.newStart += 1;
195 }
196
197 let addCount = 0,
198 removeCount = 0;
199 for (; i < diffstr.length; i++) {
200 // Lines starting with '---' could be mistaken for the "remove line" operation
201 // But they could be the header for the next file. Therefore prune such cases out.
202 if (
203 diffstr[i].indexOf('--- ') === 0 &&
204 i + 2 < diffstr.length &&
205 diffstr[i + 1].indexOf('+++ ') === 0 &&
206 diffstr[i + 2].indexOf('@@') === 0
207 ) {
208 break;
209 }
210 const operation = diffstr[i].length == 0 && i != diffstr.length - 1 ? ' ' : diffstr[i][0];
211
212 if (operation === '+' || operation === '-' || operation === ' ' || operation === '\\') {
213 hunk.lines.push(diffstr[i]);
214 hunk.linedelimiters.push(delimiters[i] || '\n');
215
216 if (operation === '+') {
217 addCount++;
218 } else if (operation === '-') {
219 removeCount++;
220 } else if (operation === ' ') {
221 addCount++;
222 removeCount++;
223 }
224 } else {
225 break;
226 }
227 }
228
229 // Handle the empty block count case
230 if (!addCount && hunk.newLines === 1) {
231 hunk.newLines = 0;
232 }
233 if (!removeCount && hunk.oldLines === 1) {
234 hunk.oldLines = 0;
235 }
236
237 return hunk;
238 }
239
240 while (i < diffstr.length) {
241 parseIndex();
242 }
243
244 return list;
245}
246
247/**
248 * Guess if it's a submodule change by the shape of the hunks.
249 * sl diff doesn't have file mode in the outputs yet.
250 *
251 * Diff pattern for a submodule change:
252 *
253 * diff --git a/path/to/submodule b/path/to/submodule
254 * --- a/path/to/submodule
255 * +++ b/path/to/submodule
256 * @@ -1,1 +1,1 @@
257 * -Subproject commit <hash>
258 * +Subproject commit <hash>
259 *
260 * Diff pattern for adding a submodule:
261 *
262 * diff --git a/path/to/submodule b/path/to/submodule
263 * new file mode 160000
264 * --- /dev/null
265 * +++ b/path/to/submodule
266 * @@ -0,0 +1,1 @@
267 * +Subproject commit <hash>
268 */
269export function guessIsSubmodule(patch: ParsedDiff): boolean {
270 if (patch.hunks.length !== 1) {
271 return false;
272 }
273 const hunk = patch.hunks[0];
274 const oldLine = /^-Subproject commit [0-9A-Fa-f]{7,64}$/;
275 const newLine = /^\+Subproject commit [0-9A-Fa-f]{7,64}$/;
276 return (
277 (patch.type === DiffType.Modified &&
278 hunk.newLines === 1 &&
279 hunk.newStart === 1 &&
280 hunk.oldLines === 1 &&
281 hunk.oldStart === 1 &&
282 hunk.lines.length === 2 &&
283 oldLine.exec(hunk.lines[0]) !== null &&
284 newLine.exec(hunk.lines[1]) !== null) ||
285 (patch.type === DiffType.Added && patch.newMode === '160000')
286 );
287}
288
289export function parseParsedDiff(
290 oldCodeLines: string[],
291 newCodeLines: string[],
292 lineNumber: number,
293 oldFileName?: string,
294 newFileName?: string,
295): ParsedDiff {
296 const hunks: Hunk[] = [];
297 const blocks = diffBlocks(oldCodeLines, newCodeLines);
298
299 blocks.forEach(block => {
300 if (block[0] === '=') {
301 return;
302 }
303
304 const oldRange = [block[1][0], block[1][1]];
305 const newRange = [block[1][2], block[1][3]];
306
307 const oldLines = oldCodeLines.slice(oldRange[0], oldRange[1]).map(codeStr => '-' + codeStr);
308 const newLines = newCodeLines.slice(newRange[0], newRange[1]).map(codeStr => '+' + codeStr);
309 const delimiters = new Array(oldLines.length + newLines.length).fill('\n');
310
311 const hunk: Hunk = {
312 oldStart: lineNumber + oldRange[0],
313 oldLines: oldLines.length ?? 0,
314 newStart: lineNumber + newRange[0],
315 newLines: newLines.length ?? 0,
316 lines: oldLines.concat(newLines),
317 linedelimiters: delimiters,
318 };
319
320 hunks.push(hunk);
321 });
322
323 return {
324 oldFileName,
325 newFileName,
326 hunks,
327 } as ParsedDiff;
328}
329
330/** Given a diff patch, filter out changes to files that are in the list. */
331export function filterFilesFromPatch(patch: string, files: string[]): string {
332 const parsedDiffs = parsePatch(patch);
333
334 // Normalize the files array - remove 'a/' and 'b/' prefixes if present
335 const normalizedFiles = files.map(f => {
336 const withoutA = f.replace(/^a\//, '');
337 const withoutB = f.replace(/^b\//, '');
338 return withoutA.length < withoutB.length ? withoutA : withoutB;
339 });
340
341 // Filter out diffs where the filename matches any in the files array
342 const filteredDiffs = parsedDiffs.filter(diff => {
343 // Extract filenames without a/ and b/ prefixes
344 const oldFile = diff.oldFileName?.replace(/^a\//, '');
345 const newFile = diff.newFileName?.replace(/^b\//, '');
346
347 // Check if either filename matches any file in the filter list
348 const shouldFilter = normalizedFiles.some(file => file === oldFile || file === newFile);
349
350 return !shouldFilter;
351 });
352
353 return stringifyPatch(filteredDiffs);
354}
355