diff.ts · grove

13.2 KB403 lines

1	/**
2	* Portions Copyright (c) Meta Platforms, Inc. and affiliates.
3	*
4	* This source code is licensed under the MIT license found in the
5	* LICENSE file in the root directory of this source tree.
6	*/
7
8	/*
9
10	Copyright (c) 2020 Jun Wu
11
12	Permission is hereby granted, free of charge, to any person obtaining a copy
13	of this software and associated documentation files (the "Software"), to deal
14	in the Software without restriction, including without limitation the rights
15	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16	copies of the Software, and to permit persons to whom the Software is
17	furnished to do so, subject to the following conditions:
18
19	The above copyright notice and this permission notice shall be included in all
20	copies or substantial portions of the Software.
21
22	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28	SOFTWARE.
29
30	*/
31
32	// Read D43857949 about the choice of the diff library.
33	import diffSequences from 'diff-sequences';
34
35	/** Index of a line. Starts from 0. */
36	export type LineIdx = number;
37
38	/**
39	* Calculate the line differences. For performance, this function only
40	* returns the line indexes for different chunks. The line contents
41	* are not returned.
42	*
43	* @param aLines lines on the "a" side.
44	* @param bLines lines on the "b" side.
45	* @param readable whether to use `readableDiffBlocks`.
46	* @returns A list of `(a1, a2, b1, b2)` tuples for the line ranges that
47	* are different between "a" and "b".
48	*/
49	export function diffLines(
50	aLines: string[],
51	bLines: string[],
52	readable = true,
53	): [LineIdx, LineIdx, LineIdx, LineIdx][] {
54	return (readable ? readableDiffBlocks : diffBlocks)(aLines, bLines)
55	.filter(([sign, _range]) => sign === '!')
56	.map(([_sign, range]) => range);
57	}
58
59	/**
60	* Calculate the line differences. For performance, this function returns
61	* line ranges not line contents.
62	*
63	* Similar to Mercurial's `mdiff.allblocks`.
64	*
65	* @param aLines lines on the "a" side.
66	* @param bLines lines on the "b" side.
67	* @returns A list of `[sign, [a1, a2, b1, b2]]` tuples for the line ranges.
68	* If `sign` is `'='`, the a1 to a2 range on the a side, and b1 to b2 range
69	* on the b side are the same on both sides. Otherwise, `sign` is `'!'`
70	* meaning the ranges are different.
71	*/
72	export function diffBlocks(aLines: string[], bLines: string[]): Array<Block> {
73	// Avoid O(string length) comparison.
74	const [aList, bList] = stringsToInts([aLines, bLines]);
75	return diffIntBlocks(aList, bList);
76	}
77
78	/**
79	* Similar to `diffBlocks`, but attempt to produce more human-readable diff.
80	*
81	* Most of the time, a diff is less human-readable because "insignificant"
82	* lines like "{" or "}" are matched. Practically, matching the actual code
83	* (significant) lines should have high priority for "readability".
84	*
85	* To prioritize significant lines, run the main diff algorithm on the
86	* significant lines first, force match the equal lines. Those equal lines
87	* will split the insignificant lines into smaller regions. Run the diff
88	* algorithm on those regions.
89	*
90	* Because insignificant lines are skipped, this function might not always
91	* produce the theoretical minimal diff like `diffBlocks`.
92	*/
93	export function readableDiffBlocks(aLines: string[], bLines: string[]): Array<Block> {
94	const aIsSignificant = aLines.map(isSignificantLine);
95	const bIsSignificant = bLines.map(isSignificantLine);
96	const insignificantCount =
97	aIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0) +
98	bIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0);
99	if (insignificantCount > (aLines.length + bLines.length + 3) / 2) {
100	// Too many insignificant lines. The algorithm might be a bad fit.
101	// Use regular diff instead.
102	return diffBlocks(aLines, bLines);
103	}
104
105	// Assign integer ids to lines.
106	const [aFull, bFull] = stringsToInts([aLines, bLines]);
107
108	// Significant lines.
109	const aSignificant = aFull.filter((_l, i) => aIsSignificant[i]);
110	const bSignificant = bFull.filter((_l, i) => bIsSignificant[i]);
111
112	// Index offset. aSignificant[i] == aFull[aSigToFull[i]].
113	const aSigToFull = calculateSigToFull(aIsSignificant);
114	const bSigToFull = calculateSigToFull(bIsSignificant);
115
116	let aLast = 0;
117	let bLast = 0;
118	const result: Array<Block> = [];
119	const push = (blocks: ReadonlyArray<Block>, aOffset = 0, bOffset = 0) => {
120	blocks.forEach(([sign, [origA1, origA2, origB1, origB2]]) => {
121	const a1 = origA1 + aOffset;
122	const a2 = origA2 + aOffset;
123	const b1 = origB1 + bOffset;
124	const b2 = origB2 + bOffset;
125	const last = result.at(-1);
126	if (last?.[0] === sign && last[1][1] === a1 && last[1][3] === b1) {
127	last[1][1] = a2;
128	last[1][3] = b2;
129	} else {
130	result.push([sign, [a1, a2, b1, b2]]);
131	}
132	});
133	};
134
135	diffIntBlocks(aSignificant, bSignificant).forEach(
136	([sign, [aSigStart, aSigEnd, bSigStart, _bSigEnd]]) => {
137	if (sign === '=') {
138	for (let a1Sig = aSigStart; a1Sig < aSigEnd; a1Sig++) {
139	const b1Sig = bSigStart + (a1Sig - aSigStart);
140	const [a1, b1] = [aSigToFull[a1Sig], bSigToFull[b1Sig]];
141	const [a2, b2] = [a1 + 1, b1 + 1];
142	// Force match the two sides. Run regular diff on the insignificant region.
143	// .. aLast \| aLast .. a1 \| a1 .. a2 \| a2 ...
144	// .. aLast \| bLast .. b1 \| b1 .. b2 \| b2 ...
145	// ----------------------------------------------------------------
146	// already in \| insignificant \| significant = \| rest
147	// result \| to diff \| force matched \| to figure out later
148	push(diffIntBlocks(aFull.slice(aLast, a1), bFull.slice(bLast, b1)), aLast, bLast);
149	push([['=', [a1, a2, b1, b2]]]);
150	[aLast, bLast] = [a2, b2];
151	}
152	}
153	},
154	);
155	if (aLast < aFull.length \|\| bLast < bFull.length) {
156	push(diffIntBlocks(aFull.slice(aLast), bFull.slice(bLast)), aLast, bLast);
157	}
158	return result;
159	}
160
161	/**
162	* Given a boolean array a[i], calculate b[i] so b[i] is the i-th `true` index in a[i],
163	* aka. a.slice(0, b[i]) has a total of `i` `true` values.
164	*/
165	function calculateSigToFull(isSignificant: ReadonlyArray<boolean>): ReadonlyArray<number> {
166	const result: number[] = [];
167	for (let i = 0; i < isSignificant.length; i++) {
168	if (isSignificant[i]) {
169	result.push(i);
170	}
171	}
172	result.push(isSignificant.length);
173	return result;
174	}
175
176	/**
177	* Test if a line is "significant". Used by `readableDiffBlocks`.
178	* Blank lines, "{", "}", "/**" are insignificant.
179	*/
180	function isSignificantLine(line: string): boolean {
181	return line.match(/^[\s{}/]$/) == null;
182	}
183
184	/**
185	* Similar to `diffBlocks`, but takes integer array instead.
186	*/
187	function diffIntBlocks(aList: ReadonlyArray<number>, bList: ReadonlyArray<number>): Array<Block> {
188	// Skip common prefix and suffix.
189	let aLen = aList.length;
190	let bLen = bList.length;
191	const minLen = Math.min(aLen, bLen);
192	let commonPrefixLen = 0;
193	let commonSuffixLen = 0;
194	while (commonPrefixLen < minLen && aList[commonPrefixLen] === bList[commonPrefixLen]) {
195	commonPrefixLen += 1;
196	}
197	while (aLen > commonPrefixLen && bLen > commonPrefixLen && aList[aLen - 1] === bList[bLen - 1]) {
198	aLen -= 1;
199	bLen -= 1;
200	commonSuffixLen += 1;
201	}
202	aLen -= commonPrefixLen;
203	bLen -= commonPrefixLen;
204
205	const blocks: Array<Block> = [];
206	if (commonPrefixLen > 0) {
207	blocks.push(['=', [0, commonPrefixLen, 0, commonPrefixLen]]);
208	}
209
210	// Run the diff algorithm.
211	let a1 = 0;
212	let b1 = 0;
213
214	function isCommon(aIndex: number, bIndex: number) {
215	return aList[aIndex + commonPrefixLen] === bList[bIndex + commonPrefixLen];
216	}
217
218	function foundSequence(n: LineIdx, a2: LineIdx, b2: LineIdx) {
219	if (a1 !== a2 \|\| b1 !== b2) {
220	blocks.push([
221	'!',
222	[a1 + commonPrefixLen, a2 + commonPrefixLen, b1 + commonPrefixLen, b2 + commonPrefixLen],
223	]);
224	}
225	if (n > 0) {
226	blocks.push([
227	'=',
228	[
229	a2 + commonPrefixLen,
230	a2 + n + commonPrefixLen,
231	b2 + commonPrefixLen,
232	b2 + n + commonPrefixLen,
233	],
234	]);
235	}
236	a1 = a2 + n;
237	b1 = b2 + n;
238	}
239
240	diffSequences(aLen, bLen, isCommon, foundSequence);
241	foundSequence(commonSuffixLen, aLen, bLen);
242
243	return blocks;
244	}
245
246	/**
247	* Post process `blocks` from `diffBlocks` to collapse unchanged lines.
248	* `contextLineCount` lines before or after a `!` (changed) block are
249	* not collapsed.
250	*
251	* If `isALineExpanded(aLine, bLine)` returns `true`, then the _block_
252	* is expanded.
253	*
254	* Split `=` blocks into `=` and `~` blocks. The `~` blocks are expected
255	* to be collapsed.
256	*/
257	export function collapseContextBlocks(
258	blocks: Array<Block>,
259	isLineExpanded: (aLine: LineIdx, bLine: LineIdx) => boolean,
260	contextLineCount = 3,
261	): Array<ContextBlock> {
262	const collapsedBlocks: Array<ContextBlock> = [];
263	blocks.forEach((block, i) => {
264	const [sign, [a1, a2, b1, b2]] = block;
265	if (sign === '!') {
266	collapsedBlocks.push(block);
267	} else if (sign === '=') {
268	// a1 ... a1 + topContext ... a2 - bottomContext ... a2
269	// ^^^ collapse this range (c1 to c2)
270	// The topContext and bottomContext can be 0 lines if they are not adjacent
271	// to a diff block.
272	const topContext = i == 0 \|\| blocks[i - 1][0] !== '!' ? 0 : contextLineCount;
273	const bottomContext =
274	i + 1 == blocks.length \|\| blocks[i + 1][0] !== '!' ? 0 : contextLineCount;
275	const c1 = Math.min(a1 + topContext, a2);
276	const c2 = Math.max(c1, a2 - bottomContext);
277	const aToB = b1 - a1;
278	if (c1 >= c2 \|\| isLineExpanded(c1, c1 + aToB) \|\| isLineExpanded(c2 - 1, c2 - 1 + aToB)) {
279	// Nothing to collapse.
280	collapsedBlocks.push(block);
281	} else {
282	// Split. Collapse c1 .. c2 range.
283	if (c1 > a1) {
284	collapsedBlocks.push(['=', [a1, c1, b1, c1 + aToB]]);
285	}
286	collapsedBlocks.push(['~', [c1, c2, c1 + aToB, c2 + aToB]]);
287	if (c2 < a2) {
288	collapsedBlocks.push(['=', [c2, a2, c2 + aToB, b2]]);
289	}
290	}
291	}
292	});
293	return collapsedBlocks;
294	}
295
296	/**
297	* Merge diffBlocks(a, b) and diffBlocks(c, b).
298	* Any difference (between a and b, or c and b) generates a `!` block.
299	* The (a1, a2) line numbers in the output blocks are changed to (b1, b2).
300	* Preserve empty (a1 == a2, b1 == b2) '!' blocks for context line calculation.
301	*/
302	export function mergeBlocks(abBlocks: Array<Block>, cbBlocks: Array<Block>): Array<Block> {
303	let i = 0; // Index of abBlocks.
304	let j = 0; // Index of cbBlocks.
305	let start = 0; // "Current" line index of b.
306	const result: Array<Block> = [];
307
308	const push = (sign: Sign, end: number) => {
309	const last = result.at(-1);
310	if (last?.[0] === sign) {
311	last[1][1] = end;
312	last[1][3] = end;
313	} else {
314	result.push([sign, [start, end, start, end]]);
315	}
316	start = end;
317	};
318
319	while (i < abBlocks.length && j < cbBlocks.length) {
320	const [sign1, [, , b11, b12]] = abBlocks[i];
321	if (b11 === b12 && b12 === start && sign1 === '!') {
322	push(sign1, start);
323	}
324	if (b12 <= start) {
325	++i;
326	continue;
327	}
328	const [sign2, [, , b21, b22]] = cbBlocks[j];
329	if (b21 === b22 && b21 === start && sign2 === '!') {
330	push(sign2, start);
331	}
332	if (b22 <= start) {
333	++j;
334	continue;
335	}
336
337	// Minimal "end" so there cannot be 2 different signs in the start-end range
338	// on either side. Note 2 sides might have different signs.
339	const end = Math.min(...[b11, b12, b21, b22].filter(i => i > start));
340
341	// Figure out the sign of the start-end range.
342	let sign: Sign = '=';
343	if (
344	(start >= b11 && end <= b12 && sign1 === '!') \|\|
345	(start >= b21 && end <= b22 && sign2 === '!')
346	) {
347	sign = '!';
348	}
349	push(sign, end);
350	}
351
352	return result;
353	}
354
355	/** Indicates whether a block is same or different on both sides. */
356	export type Sign = '=' \| '!';
357
358	/** Return type of `diffBlocks`. */
359	export type Block = [Sign, [LineIdx, LineIdx, LineIdx, LineIdx]];
360
361	/** Return type of `collapseContextLines`. */
362	export type ContextBlock = [Sign \| '~', [LineIdx, LineIdx, LineIdx, LineIdx]];
363
364	/**
365	* Split lines by `\n`. Preserve the end of lines.
366	*/
367	export function splitLines(s: string): string[] {
368	let pos = 0;
369	let nextPos = 0;
370	const result = [];
371	while (pos < s.length) {
372	nextPos = s.indexOf('\n', pos);
373	if (nextPos === -1) {
374	nextPos = s.length - 1;
375	}
376	result.push(s.slice(pos, nextPos + 1));
377	pos = nextPos + 1;
378	}
379	return result;
380	}
381
382	/**
383	* Make strings with the same content use the same integer
384	* for fast comparison.
385	*/
386	function stringsToInts(linesArray: string[][]): number[][] {
387	// This is similar to diff-match-patch's diff_linesToChars_ but is not
388	// limited to 65536 unique lines.
389	const lineMap = new Map<string, number>();
390	return linesArray.map(lines =>
391	lines.map(line => {
392	const existingId = lineMap.get(line);
393	if (existingId != null) {
394	return existingId;
395	} else {
396	const id = lineMap.size;
397	lineMap.set(line, id);
398	return id;
399	}
400	}),
401	);
402	}
403