Blame · parse.ts

b69ab31	1	/**
b69ab31	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab31	3	*
b69ab31	4	* This source code is licensed under the MIT license found in the
b69ab31	5	* LICENSE file in the root directory of this source tree.
b69ab31	6	*/
b69ab31	7
b69ab31	8	import {diffBlocks} from '../diff';
b69ab31	9	import {stringifyPatch} from './stringify';
b69ab31	10	import type {Hunk, ParsedDiff} from './types';
b69ab31	11	import {DiffType} from './types';
b69ab31	12
b69ab31	13	const DIFF = /^diff --git (.) (.)$/;
b69ab31	14	const RENAME_FROM = /^rename from (.*)$/;
b69ab31	15	const RENAME_TO = /^rename to (.*)$/;
b69ab31	16	const COPY_FROM = /^copy from (.*)$/;
b69ab31	17	const COPY_TO = /^copy to (.*)$/;
b69ab31	18	const NEW_FILE_MODE = /^new file mode (\d{6})$/;
b69ab31	19	const DELETED_FILE_MODE = /^deleted file mode (\d{6})$/;
b69ab31	20	const OLD_MODE = /^old mode (\d{6})$/;
b69ab31	21	const NEW_MODE = /^new mode (\d{6})$/;
b69ab31	22	const HUNK_HEADER = /@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/;
b69ab31	23	const OLD_FILE_HEADER = /^--- (.*)$/;
b69ab31	24	const NEW_FILE_HEADER = /^\+\+\+ (.*)$/;
b69ab31	25
b69ab31	26	const DELIMITERS = /\r\n\|[\n\v\f\r\x85]/g;
b69ab31	27
b69ab31	28	function assert(condition: unknown, msg?: string): asserts condition {
b69ab31	29	if (condition === false) {
b69ab31	30	throw new Error(msg);
b69ab31	31	}
b69ab31	32	}
b69ab31	33
b69ab31	34	/**
b69ab31	35	* Parse git diff format string.
b69ab31	36	*
b69ab31	37	* The diff library we were using does not support git diff format (rename,
b69ab31	38	* copy, empty file, file mode change etc). This function is to extend the
b69ab31	39	* original `parsePatch` function [1] and make it support git diff format [2].
b69ab31	40	*
b69ab31	41	* [1] https://github.com/DefinitelyTyped/DefinitelyTyped/blob/master/types/diff/index.d.ts#L388
b69ab31	42	* [2] https://github.com/git/git-scm.com/blob/main/spec/data/diff-generate-patch.txt
b69ab31	43	*/
b69ab31	44	export function parsePatch(patch: string): ParsedDiff[] {
b69ab31	45	const diffstr: string[] = patch.split(DELIMITERS);
b69ab31	46	const delimiters: string[] = patch.match(DELIMITERS) \|\| [];
b69ab31	47	const list: ParsedDiff[] = [];
b69ab31	48	let i = 0;
b69ab31	49
b69ab31	50	function parseIndex() {
b69ab31	51	const index: ParsedDiff = {hunks: []};
b69ab31	52	list.push(index);
b69ab31	53
b69ab31	54	parseHeader(index);
b69ab31	55
b69ab31	56	// Parse one or more extended header lines
b69ab31	57	while (i < diffstr.length) {
b69ab31	58	const line = diffstr[i];
b69ab31	59	if (/^old mode/.test(line)) {
b69ab31	60	parseOldMode(index);
b69ab31	61	} else if (/^new mode/.test(line)) {
b69ab31	62	parseNewMode(index);
b69ab31	63	} else if (/^deleted file mode/.test(line)) {
b69ab31	64	parseDeletedFileMode(index);
b69ab31	65	} else if (/^new file mode/.test(line)) {
b69ab31	66	parseNewFileMode(index);
b69ab31	67	} else if (/^copy /.test(line)) {
b69ab31	68	parseCopy(index);
b69ab31	69	} else if (/^rename /.test(line)) {
b69ab31	70	parseRename(index);
b69ab31	71	} else if (/^--- /.test(line)) {
b69ab31	72	parseFileHeader(index);
b69ab31	73	break;
b69ab31	74	} else if (/^diff --git/.test(line)) {
b69ab31	75	// a new index starts
b69ab31	76	break;
b69ab31	77	} else {
b69ab31	78	// ignore other types (e.g. similarity etc)
b69ab31	79	i++;
b69ab31	80	}
b69ab31	81	}
b69ab31	82
b69ab31	83	parseHunks(index);
b69ab31	84	}
b69ab31	85
b69ab31	86	function parseHeader(index: ParsedDiff) {
b69ab31	87	while (i < diffstr.length) {
b69ab31	88	const line = diffstr[i];
b69ab31	89	// Diff index
b69ab31	90	const header = DIFF.exec(line);
b69ab31	91	if (header) {
b69ab31	92	index.oldFileName = header[1];
b69ab31	93	index.newFileName = header[2];
b69ab31	94	i++;
b69ab31	95	break;
b69ab31	96	}
b69ab31	97	i++;
b69ab31	98	}
b69ab31	99	}
b69ab31	100
b69ab31	101	function parseOldMode(index: ParsedDiff) {
b69ab31	102	const arr = OLD_MODE.exec(diffstr[i]);
b69ab31	103	assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31	104	index.oldMode = arr[1];
b69ab31	105	index.type = DiffType.Modified;
b69ab31	106	i++;
b69ab31	107	}
b69ab31	108
b69ab31	109	function parseNewMode(index: ParsedDiff) {
b69ab31	110	const arr = NEW_MODE.exec(diffstr[i]);
b69ab31	111	assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31	112	index.newMode = arr[1];
b69ab31	113	index.type = DiffType.Modified;
b69ab31	114	i++;
b69ab31	115	}
b69ab31	116
b69ab31	117	function parseDeletedFileMode(index: ParsedDiff) {
b69ab31	118	const arr = DELETED_FILE_MODE.exec(diffstr[i]);
b69ab31	119	assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31	120	index.newMode = arr[1];
b69ab31	121	index.type = DiffType.Removed;
b69ab31	122	i++;
b69ab31	123	}
b69ab31	124
b69ab31	125	function parseNewFileMode(index: ParsedDiff) {
b69ab31	126	const arr = NEW_FILE_MODE.exec(diffstr[i]);
b69ab31	127	assert(arr !== null, `invalid format '${diffstr[i]}'`);
b69ab31	128	index.newMode = arr[1];
b69ab31	129	index.type = DiffType.Added;
b69ab31	130	i++;
b69ab31	131	}
b69ab31	132
b69ab31	133	function parseCopy(index: ParsedDiff) {
b69ab31	134	assert(COPY_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
b69ab31	135	assert(COPY_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
b69ab31	136	index.type = DiffType.Copied;
b69ab31	137	i += 2;
b69ab31	138	}
b69ab31	139
b69ab31	140	function parseRename(index: ParsedDiff) {
b69ab31	141	assert(RENAME_FROM.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
b69ab31	142	assert(RENAME_TO.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
b69ab31	143	index.type = DiffType.Renamed;
b69ab31	144	i += 2;
b69ab31	145	}
b69ab31	146
b69ab31	147	function parseFileHeader(index: ParsedDiff) {
b69ab31	148	assert(OLD_FILE_HEADER.test(diffstr[i]), `invalid format '${diffstr[i]}'`);
b69ab31	149	assert(NEW_FILE_HEADER.test(diffstr[i + 1]), `invalid format '${diffstr[i + 1]}'`);
b69ab31	150	if (index.type === undefined) {
b69ab31	151	index.type = DiffType.Modified;
b69ab31	152	}
b69ab31	153	i += 2;
b69ab31	154	}
b69ab31	155
b69ab31	156	function parseHunks(index: ParsedDiff) {
b69ab31	157	while (i < diffstr.length) {
b69ab31	158	const line = diffstr[i];
b69ab31	159	if (DIFF.test(line)) {
b69ab31	160	break;
b69ab31	161	} else if (/^@@/.test(line)) {
b69ab31	162	index.hunks.push(parseHunk());
b69ab31	163	} else {
b69ab31	164	// ignore unexpected content
b69ab31	165	i++;
b69ab31	166	}
b69ab31	167	}
b69ab31	168	}
b69ab31	169
b69ab31	170	/*
b69ab31	171	* Parses a hunk. This is copied from jsdiff library:
b69ab31	172	* https://github.com/kpdecker/jsdiff/blob/master/src/patch/parse.js
b69ab31	173	*/
b69ab31	174	function parseHunk(): Hunk {
b69ab31	175	const hunkHeaderLine = diffstr[i++];
b69ab31	176	const hunkHeader = hunkHeaderLine.split(HUNK_HEADER);
b69ab31	177
b69ab31	178	const hunk: Hunk = {
b69ab31	179	oldStart: +hunkHeader[1],
b69ab31	180	oldLines: typeof hunkHeader[2] === 'undefined' ? 1 : +hunkHeader[2],
b69ab31	181	newStart: +hunkHeader[3],
b69ab31	182	newLines: typeof hunkHeader[4] === 'undefined' ? 1 : +hunkHeader[4],
b69ab31	183	lines: [],
b69ab31	184	linedelimiters: [],
b69ab31	185	};
b69ab31	186
b69ab31	187	// Unified Diff Format quirk: If the hunk size is 0,
b69ab31	188	// the first number is one lower than one would expect.
b69ab31	189	// https://www.artima.com/weblogs/viewpost.jsp?thread=164293
b69ab31	190	if (hunk.oldLines === 0) {
b69ab31	191	hunk.oldStart += 1;
b69ab31	192	}
b69ab31	193	if (hunk.newLines === 0) {
b69ab31	194	hunk.newStart += 1;
b69ab31	195	}
b69ab31	196
b69ab31	197	let addCount = 0,
b69ab31	198	removeCount = 0;
b69ab31	199	for (; i < diffstr.length; i++) {
b69ab31	200	// Lines starting with '---' could be mistaken for the "remove line" operation
b69ab31	201	// But they could be the header for the next file. Therefore prune such cases out.
b69ab31	202	if (
b69ab31	203	diffstr[i].indexOf('--- ') === 0 &&
b69ab31	204	i + 2 < diffstr.length &&
b69ab31	205	diffstr[i + 1].indexOf('+++ ') === 0 &&
b69ab31	206	diffstr[i + 2].indexOf('@@') === 0
b69ab31	207	) {
b69ab31	208	break;
b69ab31	209	}
b69ab31	210	const operation = diffstr[i].length == 0 && i != diffstr.length - 1 ? ' ' : diffstr[i][0];
b69ab31	211
b69ab31	212	if (operation === '+' \|\| operation === '-' \|\| operation === ' ' \|\| operation === '\\') {
b69ab31	213	hunk.lines.push(diffstr[i]);
b69ab31	214	hunk.linedelimiters.push(delimiters[i] \|\| '\n');
b69ab31	215
b69ab31	216	if (operation === '+') {
b69ab31	217	addCount++;
b69ab31	218	} else if (operation === '-') {
b69ab31	219	removeCount++;
b69ab31	220	} else if (operation === ' ') {
b69ab31	221	addCount++;
b69ab31	222	removeCount++;
b69ab31	223	}
b69ab31	224	} else {
b69ab31	225	break;
b69ab31	226	}
b69ab31	227	}
b69ab31	228
b69ab31	229	// Handle the empty block count case
b69ab31	230	if (!addCount && hunk.newLines === 1) {
b69ab31	231	hunk.newLines = 0;
b69ab31	232	}
b69ab31	233	if (!removeCount && hunk.oldLines === 1) {
b69ab31	234	hunk.oldLines = 0;
b69ab31	235	}
b69ab31	236
b69ab31	237	return hunk;
b69ab31	238	}
b69ab31	239
b69ab31	240	while (i < diffstr.length) {
b69ab31	241	parseIndex();
b69ab31	242	}
b69ab31	243
b69ab31	244	return list;
b69ab31	245	}
b69ab31	246
b69ab31	247	/**
b69ab31	248	* Guess if it's a submodule change by the shape of the hunks.
b69ab31	249	* sl diff doesn't have file mode in the outputs yet.
b69ab31	250	*
b69ab31	251	* Diff pattern for a submodule change:
b69ab31	252	*
b69ab31	253	* diff --git a/path/to/submodule b/path/to/submodule
b69ab31	254	* --- a/path/to/submodule
b69ab31	255	* +++ b/path/to/submodule
b69ab31	256	* @@ -1,1 +1,1 @@
b69ab31	257	* -Subproject commit <hash>
b69ab31	258	* +Subproject commit <hash>
b69ab31	259	*
b69ab31	260	* Diff pattern for adding a submodule:
b69ab31	261	*
b69ab31	262	* diff --git a/path/to/submodule b/path/to/submodule
b69ab31	263	* new file mode 160000
b69ab31	264	* --- /dev/null
b69ab31	265	* +++ b/path/to/submodule
b69ab31	266	* @@ -0,0 +1,1 @@
b69ab31	267	* +Subproject commit <hash>
b69ab31	268	*/
b69ab31	269	export function guessIsSubmodule(patch: ParsedDiff): boolean {
b69ab31	270	if (patch.hunks.length !== 1) {
b69ab31	271	return false;
b69ab31	272	}
b69ab31	273	const hunk = patch.hunks[0];
b69ab31	274	const oldLine = /^-Subproject commit [0-9A-Fa-f]{7,64}$/;
b69ab31	275	const newLine = /^\+Subproject commit [0-9A-Fa-f]{7,64}$/;
b69ab31	276	return (
b69ab31	277	(patch.type === DiffType.Modified &&
b69ab31	278	hunk.newLines === 1 &&
b69ab31	279	hunk.newStart === 1 &&
b69ab31	280	hunk.oldLines === 1 &&
b69ab31	281	hunk.oldStart === 1 &&
b69ab31	282	hunk.lines.length === 2 &&
b69ab31	283	oldLine.exec(hunk.lines[0]) !== null &&
b69ab31	284	newLine.exec(hunk.lines[1]) !== null) \|\|
b69ab31	285	(patch.type === DiffType.Added && patch.newMode === '160000')
b69ab31	286	);
b69ab31	287	}
b69ab31	288
b69ab31	289	export function parseParsedDiff(
b69ab31	290	oldCodeLines: string[],
b69ab31	291	newCodeLines: string[],
b69ab31	292	lineNumber: number,
b69ab31	293	oldFileName?: string,
b69ab31	294	newFileName?: string,
b69ab31	295	): ParsedDiff {
b69ab31	296	const hunks: Hunk[] = [];
b69ab31	297	const blocks = diffBlocks(oldCodeLines, newCodeLines);
b69ab31	298
b69ab31	299	blocks.forEach(block => {
b69ab31	300	if (block[0] === '=') {
b69ab31	301	return;
b69ab31	302	}
b69ab31	303
b69ab31	304	const oldRange = [block[1][0], block[1][1]];
b69ab31	305	const newRange = [block[1][2], block[1][3]];
b69ab31	306
b69ab31	307	const oldLines = oldCodeLines.slice(oldRange[0], oldRange[1]).map(codeStr => '-' + codeStr);
b69ab31	308	const newLines = newCodeLines.slice(newRange[0], newRange[1]).map(codeStr => '+' + codeStr);
b69ab31	309	const delimiters = new Array(oldLines.length + newLines.length).fill('\n');
b69ab31	310
b69ab31	311	const hunk: Hunk = {
b69ab31	312	oldStart: lineNumber + oldRange[0],
b69ab31	313	oldLines: oldLines.length ?? 0,
b69ab31	314	newStart: lineNumber + newRange[0],
b69ab31	315	newLines: newLines.length ?? 0,
b69ab31	316	lines: oldLines.concat(newLines),
b69ab31	317	linedelimiters: delimiters,
b69ab31	318	};
b69ab31	319
b69ab31	320	hunks.push(hunk);
b69ab31	321	});
b69ab31	322
b69ab31	323	return {
b69ab31	324	oldFileName,
b69ab31	325	newFileName,
b69ab31	326	hunks,
b69ab31	327	} as ParsedDiff;
b69ab31	328	}
b69ab31	329
b69ab31	330	/** Given a diff patch, filter out changes to files that are in the list. */
b69ab31	331	export function filterFilesFromPatch(patch: string, files: string[]): string {
b69ab31	332	const parsedDiffs = parsePatch(patch);
b69ab31	333
b69ab31	334	// Normalize the files array - remove 'a/' and 'b/' prefixes if present
b69ab31	335	const normalizedFiles = files.map(f => {
b69ab31	336	const withoutA = f.replace(/^a\//, '');
b69ab31	337	const withoutB = f.replace(/^b\//, '');
b69ab31	338	return withoutA.length < withoutB.length ? withoutA : withoutB;
b69ab31	339	});
b69ab31	340
b69ab31	341	// Filter out diffs where the filename matches any in the files array
b69ab31	342	const filteredDiffs = parsedDiffs.filter(diff => {
b69ab31	343	// Extract filenames without a/ and b/ prefixes
b69ab31	344	const oldFile = diff.oldFileName?.replace(/^a\//, '');
b69ab31	345	const newFile = diff.newFileName?.replace(/^b\//, '');
b69ab31	346
b69ab31	347	// Check if either filename matches any file in the filter list
b69ab31	348	const shouldFilter = normalizedFiles.some(file => file === oldFile \|\| file === newFile);
b69ab31	349
b69ab31	350	return !shouldFilter;
b69ab31	351	});
b69ab31	352
b69ab31	353	return stringifyPatch(filteredDiffs);
b69ab31	354	}