parse.ts · grove

10.3 KB355 lines

1	/**
2	* Copyright (c) Meta Platforms, Inc. and affiliates.
3	*
4	* This source code is licensed under the MIT license found in the
5	* LICENSE file in the root directory of this source tree.
6	*/
7
8	import {diffBlocks} from '../diff';
9	import {stringifyPatch} from './stringify';
10	import type {Hunk, ParsedDiff} from './types';
11	import {DiffType} from './types';
12
13	const DIFF = /^diff --git (.) (.)$/;
14	const RENAME_FROM = /^rename from (.*)$/;
15	const RENAME_TO = /^rename to (.*)$/;
16	const COPY_FROM = /^copy from (.*)$/;
17	const COPY_TO = /^copy to (.*)$/;
18	const NEW_FILE_MODE = /^new file mode (\d{6})$/;
19	const DELETED_FILE_MODE = /^deleted file mode (\d{6})$/;
20	const OLD_MODE = /^old mode (\d{6})$/;
21	const NEW_MODE = /^new mode (\d{6})$/;
22	const HUNK_HEADER = /@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/;
23	const OLD_FILE_HEADER = /^--- (.*)$/;
24	const NEW_FILE_HEADER = /^\+\+\+ (.*)$/;
25
26	const DELIMITERS = /\r\n\|[\n\v\f\r\x85]/g;
27
28	function assert(condition: unknown, msg?: string): asserts condition {
29	if (condition === false) {
30	throw new Error(msg);
31	}
32	}
33
34	/**
35	* Parse git diff format string.
36	*
37	* The diff library we were using does not support git diff format (rename,
38	* copy, empty file, file mode change etc). This function is to extend the
39	* original `parsePatch` function [1] and make it support git diff format [2].
40	*
41	* [1] https://github.com/DefinitelyTyped/DefinitelyTyped/blob/master/types/diff/index.d.ts#L388
42	* [2] https://github.com/git/git-scm.com/blob/main/spec/data/diff-generate-patch.txt
43	*/
44	export function parsePatch(patch: string): ParsedDiff[] {
45	const diffstr: string[] = patch.split(DELIMITERS);
46	const delimiters: string[] = patch.match(DELIMITERS) \|\| [];
47	const list: ParsedDiff[] = [];
48	let i = 0;
49
50	function parseIndex() {
51	const index: ParsedDiff = {hunks: []};
52	list.push(index);
53
54	parseHeader(index);
55
56	// Parse one or more extended header lines
57	while (i < diffstr.length) {
58	const line = diffstr[i];
59	if (/^old mode/.test(line)) {
60	parseOldMode(index);
61	} else if (/^new mode/.test(line)) {
62	parseNewMode(index);
63	} else if (/^deleted file mode/.test(line)) {
64	parseDeletedFileMode(index);
65	} else if (/^new file mode/.test(line)) {
66	parseNewFileMode(index);
67	} else if (/^copy /.test(line)) {
68	parseCopy(index);
69	} else if (/^rename /.test(line)) {
70	parseRename(index);
71	} else if (/^--- /.test(line)) {
72	parseFileHeader(index);
73	break;
74	} else if (/^diff --git/.test(line)) {
75	// a new index starts
76	break;
77	} else {
78	// ignore other types (e.g. similarity etc)
79	i++;
80	}
81	}
82
83	parseHunks(index);
84	}
85
86	function parseHeader(index: ParsedDiff) {
87	while (i < diffstr.length) {
88	const line = diffstr[i];
89	// Diff index
90	const header = DIFF.exec(line);
91	if (header) {
92	index.oldFileName = header[1];
93	index.newFileName = header[2];
94	i++;
95	break;
96	}
97	i++;
98	}
99	}
100
101	function parseOldMode(index: ParsedDiff) {
102	const arr = OLD_MODE.exec(diffstr[i]);
103	assert(arr !== null, `invalid format '${diffstr[i]}'`);
104	index.oldMode = arr[1];
105	index.type = DiffType.Modified;
106	i++;
107	}
108
109	function parseNewMode(index: ParsedDiff) {
110	const arr = NEW_MODE.exec(diffstr[i]);
111	assert(arr !== null, `invalid format '${diffstr[i]}'`);
112	index.newMode = arr[1];
113	index.type = DiffType.Modified;
114	i++;
115	}
116
117	function parseDeletedFileMode(index: ParsedDiff) {
118	const arr = DELETED_FILE_MODE.exec(diffstr[i]);
119	assert(arr !== null, `invalid format '${diffstr[i]}'`);
120	index.newMode = arr[1];
121	index.type = DiffType.Removed;
122	i++;
123	}
124
125	function parseNewFileMode(index: ParsedDiff) {
126	const arr = NEW_FILE_MODE.exec(diffstr[i]);
127	assert(arr !== null, `invalid format '${diffstr[i]}'`);
128	index.newMode = arr[1];
129	index.type = DiffType.Added;
130	i++;
131	}
132
133	function parseCopy(index: ParsedDiff) {
134	assert(COPY_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
135	assert(COPY_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
136	index.type = DiffType.Copied;
137	i += 2;
138	}
139
140	function parseRename(index: ParsedDiff) {
141	assert(RENAME_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
142	assert(RENAME_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
143	index.type = DiffType.Renamed;
144	i += 2;
145	}
146
147	function parseFileHeader(index: ParsedDiff) {
148	assert(OLD_FILE_HEADER.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
149	assert(NEW_FILE_HEADER.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
150	if (index.type === undefined) {
151	index.type = DiffType.Modified;
152	}
153	i += 2;
154	}
155
156	function parseHunks(index: ParsedDiff) {
157	while (i < diffstr.length) {
158	const line = diffstr[i];
159	if (DIFF.test(line)) {
160	break;
161	} else if (/^@@/.test(line)) {
162	index.hunks.push(parseHunk());
163	} else {
164	// ignore unexpected content
165	i++;
166	}
167	}
168	}
169
170	/*
171	* Parses a hunk. This is copied from jsdiff library:
172	* https://github.com/kpdecker/jsdiff/blob/master/src/patch/parse.js
173	*/
174	function parseHunk(): Hunk {
175	const hunkHeaderLine = diffstr[i++];
176	const hunkHeader = hunkHeaderLine.split(HUNK_HEADER);
177
178	const hunk: Hunk = {
179	oldStart: +hunkHeader[1],
180	oldLines: typeof hunkHeader[2] === 'undefined' ? 1 : +hunkHeader[2],
181	newStart: +hunkHeader[3],
182	newLines: typeof hunkHeader[4] === 'undefined' ? 1 : +hunkHeader[4],
183	lines: [],
184	linedelimiters: [],
185	};
186
187	// Unified Diff Format quirk: If the hunk size is 0,
188	// the first number is one lower than one would expect.
189	// https://www.artima.com/weblogs/viewpost.jsp?thread=164293
190	if (hunk.oldLines === 0) {
191	hunk.oldStart += 1;
192	}
193	if (hunk.newLines === 0) {
194	hunk.newStart += 1;
195	}
196
197	let addCount = 0,
198	removeCount = 0;
199	for (; i < diffstr.length; i++) {
200	// Lines starting with '---' could be mistaken for the "remove line" operation
201	// But they could be the header for the next file. Therefore prune such cases out.
202	if (
203	diffstr[i].indexOf('--- ') === 0 &&
204	i + 2 < diffstr.length &&
205	diffstr[i + 1].indexOf('+++ ') === 0 &&
206	diffstr[i + 2].indexOf('@@') === 0
207	) {
208	break;
209	}
210	const operation = diffstr[i].length == 0 && i != diffstr.length - 1 ? ' ' : diffstr[i][0];
211
212	if (operation === '+' \|\| operation === '-' \|\| operation === ' ' \|\| operation === '\\') {
213	hunk.lines.push(diffstr[i]);
214	hunk.linedelimiters.push(delimiters[i] \|\| '\n');
215
216	if (operation === '+') {
217	addCount++;
218	} else if (operation === '-') {
219	removeCount++;
220	} else if (operation === ' ') {
221	addCount++;
222	removeCount++;
223	}
224	} else {
225	break;
226	}
227	}
228
229	// Handle the empty block count case
230	if (!addCount && hunk.newLines === 1) {
231	hunk.newLines = 0;
232	}
233	if (!removeCount && hunk.oldLines === 1) {
234	hunk.oldLines = 0;
235	}
236
237	return hunk;
238	}
239
240	while (i < diffstr.length) {
241	parseIndex();
242	}
243
244	return list;
245	}
246
247	/**
248	* Guess if it's a submodule change by the shape of the hunks.
249	* sl diff doesn't have file mode in the outputs yet.
250	*
251	* Diff pattern for a submodule change:
252	*
253	* diff --git a/path/to/submodule b/path/to/submodule
254	* --- a/path/to/submodule
255	* +++ b/path/to/submodule
256	* @@ -1,1 +1,1 @@
257	* -Subproject commit <hash>
258	* +Subproject commit <hash>
259	*
260	* Diff pattern for adding a submodule:
261	*
262	* diff --git a/path/to/submodule b/path/to/submodule
263	* new file mode 160000
264	* --- /dev/null
265	* +++ b/path/to/submodule
266	* @@ -0,0 +1,1 @@
267	* +Subproject commit <hash>
268	*/
269	export function guessIsSubmodule(patch: ParsedDiff): boolean {
270	if (patch.hunks.length !== 1) {
271	return false;
272	}
273	const hunk = patch.hunks[0];
274	const oldLine = /^-Subproject commit [0-9A-Fa-f]{7,64}$/;
275	const newLine = /^\+Subproject commit [0-9A-Fa-f]{7,64}$/;
276	return (
277	(patch.type === DiffType.Modified &&
278	hunk.newLines === 1 &&
279	hunk.newStart === 1 &&
280	hunk.oldLines === 1 &&
281	hunk.oldStart === 1 &&
282	hunk.lines.length === 2 &&
283	oldLine.exec(hunk.lines[0]) !== null &&
284	newLine.exec(hunk.lines[1]) !== null) \|\|
285	(patch.type === DiffType.Added && patch.newMode === '160000')
286	);
287	}
288
289	export function parseParsedDiff(
290	oldCodeLines: string[],
291	newCodeLines: string[],
292	lineNumber: number,
293	oldFileName?: string,
294	newFileName?: string,
295	): ParsedDiff {
296	const hunks: Hunk[] = [];
297	const blocks = diffBlocks(oldCodeLines, newCodeLines);
298
299	blocks.forEach(block => {
300	if (block[0] === '=') {
301	return;
302	}
303
304	const oldRange = [block[1][0], block[1][1]];
305	const newRange = [block[1][2], block[1][3]];
306
307	const oldLines = oldCodeLines.slice(oldRange[0], oldRange[1]).map(codeStr => '-' + codeStr);
308	const newLines = newCodeLines.slice(newRange[0], newRange[1]).map(codeStr => '+' + codeStr);
309	const delimiters = new Array(oldLines.length + newLines.length).fill('\n');
310
311	const hunk: Hunk = {
312	oldStart: lineNumber + oldRange[0],
313	oldLines: oldLines.length ?? 0,
314	newStart: lineNumber + newRange[0],
315	newLines: newLines.length ?? 0,
316	lines: oldLines.concat(newLines),
317	linedelimiters: delimiters,
318	};
319
320	hunks.push(hunk);
321	});
322
323	return {
324	oldFileName,
325	newFileName,
326	hunks,
327	} as ParsedDiff;
328	}
329
330	/** Given a diff patch, filter out changes to files that are in the list. */
331	export function filterFilesFromPatch(patch: string, files: string[]): string {
332	const parsedDiffs = parsePatch(patch);
333
334	// Normalize the files array - remove 'a/' and 'b/' prefixes if present
335	const normalizedFiles = files.map(f => {
336	const withoutA = f.replace(/^a\//, '');
337	const withoutB = f.replace(/^b\//, '');
338	return withoutA.length < withoutB.length ? withoutA : withoutB;
339	});
340
341	// Filter out diffs where the filename matches any in the files array
342	const filteredDiffs = parsedDiffs.filter(diff => {
343	// Extract filenames without a/ and b/ prefixes
344	const oldFile = diff.oldFileName?.replace(/^a\//, '');
345	const newFile = diff.newFileName?.replace(/^b\//, '');
346
347	// Check if either filename matches any file in the filter list
348	const shouldFilter = normalizedFiles.some(file => file === oldFile \|\| file === newFile);
349
350	return !shouldFilter;
351	});
352
353	return stringifyPatch(filteredDiffs);
354	}
355