13.2 KB403 lines
Blame
1/**
2 * Portions Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8/*
9
10Copyright (c) 2020 Jun Wu
11
12Permission is hereby granted, free of charge, to any person obtaining a copy
13of this software and associated documentation files (the "Software"), to deal
14in the Software without restriction, including without limitation the rights
15to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16copies of the Software, and to permit persons to whom the Software is
17furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice shall be included in all
20copies or substantial portions of the Software.
21
22THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28SOFTWARE.
29
30*/
31
32// Read D43857949 about the choice of the diff library.
33import diffSequences from 'diff-sequences';
34
35/** Index of a line. Starts from 0. */
36export type LineIdx = number;
37
38/**
39 * Calculate the line differences. For performance, this function only
40 * returns the line indexes for different chunks. The line contents
41 * are not returned.
42 *
43 * @param aLines lines on the "a" side.
44 * @param bLines lines on the "b" side.
45 * @param readable whether to use `readableDiffBlocks`.
46 * @returns A list of `(a1, a2, b1, b2)` tuples for the line ranges that
47 * are different between "a" and "b".
48 */
49export function diffLines(
50 aLines: string[],
51 bLines: string[],
52 readable = true,
53): [LineIdx, LineIdx, LineIdx, LineIdx][] {
54 return (readable ? readableDiffBlocks : diffBlocks)(aLines, bLines)
55 .filter(([sign, _range]) => sign === '!')
56 .map(([_sign, range]) => range);
57}
58
59/**
60 * Calculate the line differences. For performance, this function returns
61 * line ranges not line contents.
62 *
63 * Similar to Mercurial's `mdiff.allblocks`.
64 *
65 * @param aLines lines on the "a" side.
66 * @param bLines lines on the "b" side.
67 * @returns A list of `[sign, [a1, a2, b1, b2]]` tuples for the line ranges.
68 * If `sign` is `'='`, the a1 to a2 range on the a side, and b1 to b2 range
69 * on the b side are the same on both sides. Otherwise, `sign` is `'!'`
70 * meaning the ranges are different.
71 */
72export function diffBlocks(aLines: string[], bLines: string[]): Array<Block> {
73 // Avoid O(string length) comparison.
74 const [aList, bList] = stringsToInts([aLines, bLines]);
75 return diffIntBlocks(aList, bList);
76}
77
78/**
79 * Similar to `diffBlocks`, but attempt to produce more human-readable diff.
80 *
81 * Most of the time, a diff is less human-readable because "insignificant"
82 * lines like "{" or "}" are matched. Practically, matching the actual code
83 * (significant) lines should have high priority for "readability".
84 *
85 * To prioritize significant lines, run the main diff algorithm on the
86 * significant lines first, force match the equal lines. Those equal lines
87 * will split the insignificant lines into smaller regions. Run the diff
88 * algorithm on those regions.
89 *
90 * Because insignificant lines are skipped, this function might not always
91 * produce the theoretical minimal diff like `diffBlocks`.
92 */
93export function readableDiffBlocks(aLines: string[], bLines: string[]): Array<Block> {
94 const aIsSignificant = aLines.map(isSignificantLine);
95 const bIsSignificant = bLines.map(isSignificantLine);
96 const insignificantCount =
97 aIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0) +
98 bIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0);
99 if (insignificantCount > (aLines.length + bLines.length + 3) / 2) {
100 // Too many insignificant lines. The algorithm might be a bad fit.
101 // Use regular diff instead.
102 return diffBlocks(aLines, bLines);
103 }
104
105 // Assign integer ids to lines.
106 const [aFull, bFull] = stringsToInts([aLines, bLines]);
107
108 // Significant lines.
109 const aSignificant = aFull.filter((_l, i) => aIsSignificant[i]);
110 const bSignificant = bFull.filter((_l, i) => bIsSignificant[i]);
111
112 // Index offset. aSignificant[i] == aFull[aSigToFull[i]].
113 const aSigToFull = calculateSigToFull(aIsSignificant);
114 const bSigToFull = calculateSigToFull(bIsSignificant);
115
116 let aLast = 0;
117 let bLast = 0;
118 const result: Array<Block> = [];
119 const push = (blocks: ReadonlyArray<Block>, aOffset = 0, bOffset = 0) => {
120 blocks.forEach(([sign, [origA1, origA2, origB1, origB2]]) => {
121 const a1 = origA1 + aOffset;
122 const a2 = origA2 + aOffset;
123 const b1 = origB1 + bOffset;
124 const b2 = origB2 + bOffset;
125 const last = result.at(-1);
126 if (last?.[0] === sign && last[1][1] === a1 && last[1][3] === b1) {
127 last[1][1] = a2;
128 last[1][3] = b2;
129 } else {
130 result.push([sign, [a1, a2, b1, b2]]);
131 }
132 });
133 };
134
135 diffIntBlocks(aSignificant, bSignificant).forEach(
136 ([sign, [aSigStart, aSigEnd, bSigStart, _bSigEnd]]) => {
137 if (sign === '=') {
138 for (let a1Sig = aSigStart; a1Sig < aSigEnd; a1Sig++) {
139 const b1Sig = bSigStart + (a1Sig - aSigStart);
140 const [a1, b1] = [aSigToFull[a1Sig], bSigToFull[b1Sig]];
141 const [a2, b2] = [a1 + 1, b1 + 1];
142 // Force match the two sides. Run regular diff on the insignificant region.
143 // .. aLast | aLast .. a1 | a1 .. a2 | a2 ...
144 // .. aLast | bLast .. b1 | b1 .. b2 | b2 ...
145 // ----------------------------------------------------------------
146 // already in | insignificant | significant = | rest
147 // result | to diff | force matched | to figure out later
148 push(diffIntBlocks(aFull.slice(aLast, a1), bFull.slice(bLast, b1)), aLast, bLast);
149 push([['=', [a1, a2, b1, b2]]]);
150 [aLast, bLast] = [a2, b2];
151 }
152 }
153 },
154 );
155 if (aLast < aFull.length || bLast < bFull.length) {
156 push(diffIntBlocks(aFull.slice(aLast), bFull.slice(bLast)), aLast, bLast);
157 }
158 return result;
159}
160
161/**
162 * Given a boolean array a[i], calculate b[i] so b[i] is the i-th `true` index in a[i],
163 * aka. a.slice(0, b[i]) has a total of `i` `true` values.
164 */
165function calculateSigToFull(isSignificant: ReadonlyArray<boolean>): ReadonlyArray<number> {
166 const result: number[] = [];
167 for (let i = 0; i < isSignificant.length; i++) {
168 if (isSignificant[i]) {
169 result.push(i);
170 }
171 }
172 result.push(isSignificant.length);
173 return result;
174}
175
176/**
177 * Test if a line is "significant". Used by `readableDiffBlocks`.
178 * Blank lines, "{", "}", "/**" are insignificant.
179 */
180function isSignificantLine(line: string): boolean {
181 return line.match(/^[\s{}/*]*$/) == null;
182}
183
184/**
185 * Similar to `diffBlocks`, but takes integer array instead.
186 */
187function diffIntBlocks(aList: ReadonlyArray<number>, bList: ReadonlyArray<number>): Array<Block> {
188 // Skip common prefix and suffix.
189 let aLen = aList.length;
190 let bLen = bList.length;
191 const minLen = Math.min(aLen, bLen);
192 let commonPrefixLen = 0;
193 let commonSuffixLen = 0;
194 while (commonPrefixLen < minLen && aList[commonPrefixLen] === bList[commonPrefixLen]) {
195 commonPrefixLen += 1;
196 }
197 while (aLen > commonPrefixLen && bLen > commonPrefixLen && aList[aLen - 1] === bList[bLen - 1]) {
198 aLen -= 1;
199 bLen -= 1;
200 commonSuffixLen += 1;
201 }
202 aLen -= commonPrefixLen;
203 bLen -= commonPrefixLen;
204
205 const blocks: Array<Block> = [];
206 if (commonPrefixLen > 0) {
207 blocks.push(['=', [0, commonPrefixLen, 0, commonPrefixLen]]);
208 }
209
210 // Run the diff algorithm.
211 let a1 = 0;
212 let b1 = 0;
213
214 function isCommon(aIndex: number, bIndex: number) {
215 return aList[aIndex + commonPrefixLen] === bList[bIndex + commonPrefixLen];
216 }
217
218 function foundSequence(n: LineIdx, a2: LineIdx, b2: LineIdx) {
219 if (a1 !== a2 || b1 !== b2) {
220 blocks.push([
221 '!',
222 [a1 + commonPrefixLen, a2 + commonPrefixLen, b1 + commonPrefixLen, b2 + commonPrefixLen],
223 ]);
224 }
225 if (n > 0) {
226 blocks.push([
227 '=',
228 [
229 a2 + commonPrefixLen,
230 a2 + n + commonPrefixLen,
231 b2 + commonPrefixLen,
232 b2 + n + commonPrefixLen,
233 ],
234 ]);
235 }
236 a1 = a2 + n;
237 b1 = b2 + n;
238 }
239
240 diffSequences(aLen, bLen, isCommon, foundSequence);
241 foundSequence(commonSuffixLen, aLen, bLen);
242
243 return blocks;
244}
245
246/**
247 * Post process `blocks` from `diffBlocks` to collapse unchanged lines.
248 * `contextLineCount` lines before or after a `!` (changed) block are
249 * not collapsed.
250 *
251 * If `isALineExpanded(aLine, bLine)` returns `true`, then the _block_
252 * is expanded.
253 *
254 * Split `=` blocks into `=` and `~` blocks. The `~` blocks are expected
255 * to be collapsed.
256 */
257export function collapseContextBlocks(
258 blocks: Array<Block>,
259 isLineExpanded: (aLine: LineIdx, bLine: LineIdx) => boolean,
260 contextLineCount = 3,
261): Array<ContextBlock> {
262 const collapsedBlocks: Array<ContextBlock> = [];
263 blocks.forEach((block, i) => {
264 const [sign, [a1, a2, b1, b2]] = block;
265 if (sign === '!') {
266 collapsedBlocks.push(block);
267 } else if (sign === '=') {
268 // a1 ... a1 + topContext ... a2 - bottomContext ... a2
269 // ^^^ collapse this range (c1 to c2)
270 // The topContext and bottomContext can be 0 lines if they are not adjacent
271 // to a diff block.
272 const topContext = i == 0 || blocks[i - 1][0] !== '!' ? 0 : contextLineCount;
273 const bottomContext =
274 i + 1 == blocks.length || blocks[i + 1][0] !== '!' ? 0 : contextLineCount;
275 const c1 = Math.min(a1 + topContext, a2);
276 const c2 = Math.max(c1, a2 - bottomContext);
277 const aToB = b1 - a1;
278 if (c1 >= c2 || isLineExpanded(c1, c1 + aToB) || isLineExpanded(c2 - 1, c2 - 1 + aToB)) {
279 // Nothing to collapse.
280 collapsedBlocks.push(block);
281 } else {
282 // Split. Collapse c1 .. c2 range.
283 if (c1 > a1) {
284 collapsedBlocks.push(['=', [a1, c1, b1, c1 + aToB]]);
285 }
286 collapsedBlocks.push(['~', [c1, c2, c1 + aToB, c2 + aToB]]);
287 if (c2 < a2) {
288 collapsedBlocks.push(['=', [c2, a2, c2 + aToB, b2]]);
289 }
290 }
291 }
292 });
293 return collapsedBlocks;
294}
295
296/**
297 * Merge diffBlocks(a, b) and diffBlocks(c, b).
298 * Any difference (between a and b, or c and b) generates a `!` block.
299 * The (a1, a2) line numbers in the output blocks are changed to (b1, b2).
300 * Preserve empty (a1 == a2, b1 == b2) '!' blocks for context line calculation.
301 */
302export function mergeBlocks(abBlocks: Array<Block>, cbBlocks: Array<Block>): Array<Block> {
303 let i = 0; // Index of abBlocks.
304 let j = 0; // Index of cbBlocks.
305 let start = 0; // "Current" line index of b.
306 const result: Array<Block> = [];
307
308 const push = (sign: Sign, end: number) => {
309 const last = result.at(-1);
310 if (last?.[0] === sign) {
311 last[1][1] = end;
312 last[1][3] = end;
313 } else {
314 result.push([sign, [start, end, start, end]]);
315 }
316 start = end;
317 };
318
319 while (i < abBlocks.length && j < cbBlocks.length) {
320 const [sign1, [, , b11, b12]] = abBlocks[i];
321 if (b11 === b12 && b12 === start && sign1 === '!') {
322 push(sign1, start);
323 }
324 if (b12 <= start) {
325 ++i;
326 continue;
327 }
328 const [sign2, [, , b21, b22]] = cbBlocks[j];
329 if (b21 === b22 && b21 === start && sign2 === '!') {
330 push(sign2, start);
331 }
332 if (b22 <= start) {
333 ++j;
334 continue;
335 }
336
337 // Minimal "end" so there cannot be 2 different signs in the start-end range
338 // on either side. Note 2 sides might have different signs.
339 const end = Math.min(...[b11, b12, b21, b22].filter(i => i > start));
340
341 // Figure out the sign of the start-end range.
342 let sign: Sign = '=';
343 if (
344 (start >= b11 && end <= b12 && sign1 === '!') ||
345 (start >= b21 && end <= b22 && sign2 === '!')
346 ) {
347 sign = '!';
348 }
349 push(sign, end);
350 }
351
352 return result;
353}
354
355/** Indicates whether a block is same or different on both sides. */
356export type Sign = '=' | '!';
357
358/** Return type of `diffBlocks`. */
359export type Block = [Sign, [LineIdx, LineIdx, LineIdx, LineIdx]];
360
361/** Return type of `collapseContextLines`. */
362export type ContextBlock = [Sign | '~', [LineIdx, LineIdx, LineIdx, LineIdx]];
363
364/**
365 * Split lines by `\n`. Preserve the end of lines.
366 */
367export function splitLines(s: string): string[] {
368 let pos = 0;
369 let nextPos = 0;
370 const result = [];
371 while (pos < s.length) {
372 nextPos = s.indexOf('\n', pos);
373 if (nextPos === -1) {
374 nextPos = s.length - 1;
375 }
376 result.push(s.slice(pos, nextPos + 1));
377 pos = nextPos + 1;
378 }
379 return result;
380}
381
382/**
383 * Make strings with the same content use the same integer
384 * for fast comparison.
385 */
386function stringsToInts(linesArray: string[][]): number[][] {
387 // This is similar to diff-match-patch's diff_linesToChars_ but is not
388 // limited to 65536 unique lines.
389 const lineMap = new Map<string, number>();
390 return linesArray.map(lines =>
391 lines.map(line => {
392 const existingId = lineMap.get(line);
393 if (existingId != null) {
394 return existingId;
395 } else {
396 const id = lineMap.size;
397 lineMap.set(line, id);
398 return id;
399 }
400 }),
401 );
402}
403