| 1 | /** |
| 2 | * Portions Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * |
| 4 | * This source code is licensed under the MIT license found in the |
| 5 | * LICENSE file in the root directory of this source tree. |
| 6 | */ |
| 7 | |
| 8 | /* |
| 9 | |
| 10 | Copyright (c) 2020 Jun Wu |
| 11 | |
| 12 | Permission is hereby granted, free of charge, to any person obtaining a copy |
| 13 | of this software and associated documentation files (the "Software"), to deal |
| 14 | in the Software without restriction, including without limitation the rights |
| 15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 16 | copies of the Software, and to permit persons to whom the Software is |
| 17 | furnished to do so, subject to the following conditions: |
| 18 | |
| 19 | The above copyright notice and this permission notice shall be included in all |
| 20 | copies or substantial portions of the Software. |
| 21 | |
| 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 28 | SOFTWARE. |
| 29 | |
| 30 | */ |
| 31 | |
| 32 | // Read D43857949 about the choice of the diff library. |
| 33 | import diffSequences from 'diff-sequences'; |
| 34 | |
| 35 | /** Index of a line. Starts from 0. */ |
| 36 | export type LineIdx = number; |
| 37 | |
| 38 | /** |
| 39 | * Calculate the line differences. For performance, this function only |
| 40 | * returns the line indexes for different chunks. The line contents |
| 41 | * are not returned. |
| 42 | * |
| 43 | * @param aLines lines on the "a" side. |
| 44 | * @param bLines lines on the "b" side. |
| 45 | * @param readable whether to use `readableDiffBlocks`. |
| 46 | * @returns A list of `(a1, a2, b1, b2)` tuples for the line ranges that |
| 47 | * are different between "a" and "b". |
| 48 | */ |
| 49 | export function diffLines( |
| 50 | aLines: string[], |
| 51 | bLines: string[], |
| 52 | readable = true, |
| 53 | ): [LineIdx, LineIdx, LineIdx, LineIdx][] { |
| 54 | return (readable ? readableDiffBlocks : diffBlocks)(aLines, bLines) |
| 55 | .filter(([sign, _range]) => sign === '!') |
| 56 | .map(([_sign, range]) => range); |
| 57 | } |
| 58 | |
| 59 | /** |
| 60 | * Calculate the line differences. For performance, this function returns |
| 61 | * line ranges not line contents. |
| 62 | * |
| 63 | * Similar to Mercurial's `mdiff.allblocks`. |
| 64 | * |
| 65 | * @param aLines lines on the "a" side. |
| 66 | * @param bLines lines on the "b" side. |
| 67 | * @returns A list of `[sign, [a1, a2, b1, b2]]` tuples for the line ranges. |
| 68 | * If `sign` is `'='`, the a1 to a2 range on the a side, and b1 to b2 range |
| 69 | * on the b side are the same on both sides. Otherwise, `sign` is `'!'` |
| 70 | * meaning the ranges are different. |
| 71 | */ |
| 72 | export function diffBlocks(aLines: string[], bLines: string[]): Array<Block> { |
| 73 | // Avoid O(string length) comparison. |
| 74 | const [aList, bList] = stringsToInts([aLines, bLines]); |
| 75 | return diffIntBlocks(aList, bList); |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * Similar to `diffBlocks`, but attempt to produce more human-readable diff. |
| 80 | * |
| 81 | * Most of the time, a diff is less human-readable because "insignificant" |
| 82 | * lines like "{" or "}" are matched. Practically, matching the actual code |
| 83 | * (significant) lines should have high priority for "readability". |
| 84 | * |
| 85 | * To prioritize significant lines, run the main diff algorithm on the |
| 86 | * significant lines first, force match the equal lines. Those equal lines |
| 87 | * will split the insignificant lines into smaller regions. Run the diff |
| 88 | * algorithm on those regions. |
| 89 | * |
| 90 | * Because insignificant lines are skipped, this function might not always |
| 91 | * produce the theoretical minimal diff like `diffBlocks`. |
| 92 | */ |
| 93 | export function readableDiffBlocks(aLines: string[], bLines: string[]): Array<Block> { |
| 94 | const aIsSignificant = aLines.map(isSignificantLine); |
| 95 | const bIsSignificant = bLines.map(isSignificantLine); |
| 96 | const insignificantCount = |
| 97 | aIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0) + |
| 98 | bIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0); |
| 99 | if (insignificantCount > (aLines.length + bLines.length + 3) / 2) { |
| 100 | // Too many insignificant lines. The algorithm might be a bad fit. |
| 101 | // Use regular diff instead. |
| 102 | return diffBlocks(aLines, bLines); |
| 103 | } |
| 104 | |
| 105 | // Assign integer ids to lines. |
| 106 | const [aFull, bFull] = stringsToInts([aLines, bLines]); |
| 107 | |
| 108 | // Significant lines. |
| 109 | const aSignificant = aFull.filter((_l, i) => aIsSignificant[i]); |
| 110 | const bSignificant = bFull.filter((_l, i) => bIsSignificant[i]); |
| 111 | |
| 112 | // Index offset. aSignificant[i] == aFull[aSigToFull[i]]. |
| 113 | const aSigToFull = calculateSigToFull(aIsSignificant); |
| 114 | const bSigToFull = calculateSigToFull(bIsSignificant); |
| 115 | |
| 116 | let aLast = 0; |
| 117 | let bLast = 0; |
| 118 | const result: Array<Block> = []; |
| 119 | const push = (blocks: ReadonlyArray<Block>, aOffset = 0, bOffset = 0) => { |
| 120 | blocks.forEach(([sign, [origA1, origA2, origB1, origB2]]) => { |
| 121 | const a1 = origA1 + aOffset; |
| 122 | const a2 = origA2 + aOffset; |
| 123 | const b1 = origB1 + bOffset; |
| 124 | const b2 = origB2 + bOffset; |
| 125 | const last = result.at(-1); |
| 126 | if (last?.[0] === sign && last[1][1] === a1 && last[1][3] === b1) { |
| 127 | last[1][1] = a2; |
| 128 | last[1][3] = b2; |
| 129 | } else { |
| 130 | result.push([sign, [a1, a2, b1, b2]]); |
| 131 | } |
| 132 | }); |
| 133 | }; |
| 134 | |
| 135 | diffIntBlocks(aSignificant, bSignificant).forEach( |
| 136 | ([sign, [aSigStart, aSigEnd, bSigStart, _bSigEnd]]) => { |
| 137 | if (sign === '=') { |
| 138 | for (let a1Sig = aSigStart; a1Sig < aSigEnd; a1Sig++) { |
| 139 | const b1Sig = bSigStart + (a1Sig - aSigStart); |
| 140 | const [a1, b1] = [aSigToFull[a1Sig], bSigToFull[b1Sig]]; |
| 141 | const [a2, b2] = [a1 + 1, b1 + 1]; |
| 142 | // Force match the two sides. Run regular diff on the insignificant region. |
| 143 | // .. aLast | aLast .. a1 | a1 .. a2 | a2 ... |
| 144 | // .. aLast | bLast .. b1 | b1 .. b2 | b2 ... |
| 145 | // ---------------------------------------------------------------- |
| 146 | // already in | insignificant | significant = | rest |
| 147 | // result | to diff | force matched | to figure out later |
| 148 | push(diffIntBlocks(aFull.slice(aLast, a1), bFull.slice(bLast, b1)), aLast, bLast); |
| 149 | push([['=', [a1, a2, b1, b2]]]); |
| 150 | [aLast, bLast] = [a2, b2]; |
| 151 | } |
| 152 | } |
| 153 | }, |
| 154 | ); |
| 155 | if (aLast < aFull.length || bLast < bFull.length) { |
| 156 | push(diffIntBlocks(aFull.slice(aLast), bFull.slice(bLast)), aLast, bLast); |
| 157 | } |
| 158 | return result; |
| 159 | } |
| 160 | |
| 161 | /** |
| 162 | * Given a boolean array a[i], calculate b[i] so b[i] is the i-th `true` index in a[i], |
| 163 | * aka. a.slice(0, b[i]) has a total of `i` `true` values. |
| 164 | */ |
| 165 | function calculateSigToFull(isSignificant: ReadonlyArray<boolean>): ReadonlyArray<number> { |
| 166 | const result: number[] = []; |
| 167 | for (let i = 0; i < isSignificant.length; i++) { |
| 168 | if (isSignificant[i]) { |
| 169 | result.push(i); |
| 170 | } |
| 171 | } |
| 172 | result.push(isSignificant.length); |
| 173 | return result; |
| 174 | } |
| 175 | |
| 176 | /** |
| 177 | * Test if a line is "significant". Used by `readableDiffBlocks`. |
| 178 | * Blank lines, "{", "}", "/**" are insignificant. |
| 179 | */ |
| 180 | function isSignificantLine(line: string): boolean { |
| 181 | return line.match(/^[\s{}/*]*$/) == null; |
| 182 | } |
| 183 | |
| 184 | /** |
| 185 | * Similar to `diffBlocks`, but takes integer array instead. |
| 186 | */ |
| 187 | function diffIntBlocks(aList: ReadonlyArray<number>, bList: ReadonlyArray<number>): Array<Block> { |
| 188 | // Skip common prefix and suffix. |
| 189 | let aLen = aList.length; |
| 190 | let bLen = bList.length; |
| 191 | const minLen = Math.min(aLen, bLen); |
| 192 | let commonPrefixLen = 0; |
| 193 | let commonSuffixLen = 0; |
| 194 | while (commonPrefixLen < minLen && aList[commonPrefixLen] === bList[commonPrefixLen]) { |
| 195 | commonPrefixLen += 1; |
| 196 | } |
| 197 | while (aLen > commonPrefixLen && bLen > commonPrefixLen && aList[aLen - 1] === bList[bLen - 1]) { |
| 198 | aLen -= 1; |
| 199 | bLen -= 1; |
| 200 | commonSuffixLen += 1; |
| 201 | } |
| 202 | aLen -= commonPrefixLen; |
| 203 | bLen -= commonPrefixLen; |
| 204 | |
| 205 | const blocks: Array<Block> = []; |
| 206 | if (commonPrefixLen > 0) { |
| 207 | blocks.push(['=', [0, commonPrefixLen, 0, commonPrefixLen]]); |
| 208 | } |
| 209 | |
| 210 | // Run the diff algorithm. |
| 211 | let a1 = 0; |
| 212 | let b1 = 0; |
| 213 | |
| 214 | function isCommon(aIndex: number, bIndex: number) { |
| 215 | return aList[aIndex + commonPrefixLen] === bList[bIndex + commonPrefixLen]; |
| 216 | } |
| 217 | |
| 218 | function foundSequence(n: LineIdx, a2: LineIdx, b2: LineIdx) { |
| 219 | if (a1 !== a2 || b1 !== b2) { |
| 220 | blocks.push([ |
| 221 | '!', |
| 222 | [a1 + commonPrefixLen, a2 + commonPrefixLen, b1 + commonPrefixLen, b2 + commonPrefixLen], |
| 223 | ]); |
| 224 | } |
| 225 | if (n > 0) { |
| 226 | blocks.push([ |
| 227 | '=', |
| 228 | [ |
| 229 | a2 + commonPrefixLen, |
| 230 | a2 + n + commonPrefixLen, |
| 231 | b2 + commonPrefixLen, |
| 232 | b2 + n + commonPrefixLen, |
| 233 | ], |
| 234 | ]); |
| 235 | } |
| 236 | a1 = a2 + n; |
| 237 | b1 = b2 + n; |
| 238 | } |
| 239 | |
| 240 | diffSequences(aLen, bLen, isCommon, foundSequence); |
| 241 | foundSequence(commonSuffixLen, aLen, bLen); |
| 242 | |
| 243 | return blocks; |
| 244 | } |
| 245 | |
| 246 | /** |
| 247 | * Post process `blocks` from `diffBlocks` to collapse unchanged lines. |
| 248 | * `contextLineCount` lines before or after a `!` (changed) block are |
| 249 | * not collapsed. |
| 250 | * |
| 251 | * If `isALineExpanded(aLine, bLine)` returns `true`, then the _block_ |
| 252 | * is expanded. |
| 253 | * |
| 254 | * Split `=` blocks into `=` and `~` blocks. The `~` blocks are expected |
| 255 | * to be collapsed. |
| 256 | */ |
| 257 | export function collapseContextBlocks( |
| 258 | blocks: Array<Block>, |
| 259 | isLineExpanded: (aLine: LineIdx, bLine: LineIdx) => boolean, |
| 260 | contextLineCount = 3, |
| 261 | ): Array<ContextBlock> { |
| 262 | const collapsedBlocks: Array<ContextBlock> = []; |
| 263 | blocks.forEach((block, i) => { |
| 264 | const [sign, [a1, a2, b1, b2]] = block; |
| 265 | if (sign === '!') { |
| 266 | collapsedBlocks.push(block); |
| 267 | } else if (sign === '=') { |
| 268 | // a1 ... a1 + topContext ... a2 - bottomContext ... a2 |
| 269 | // ^^^ collapse this range (c1 to c2) |
| 270 | // The topContext and bottomContext can be 0 lines if they are not adjacent |
| 271 | // to a diff block. |
| 272 | const topContext = i == 0 || blocks[i - 1][0] !== '!' ? 0 : contextLineCount; |
| 273 | const bottomContext = |
| 274 | i + 1 == blocks.length || blocks[i + 1][0] !== '!' ? 0 : contextLineCount; |
| 275 | const c1 = Math.min(a1 + topContext, a2); |
| 276 | const c2 = Math.max(c1, a2 - bottomContext); |
| 277 | const aToB = b1 - a1; |
| 278 | if (c1 >= c2 || isLineExpanded(c1, c1 + aToB) || isLineExpanded(c2 - 1, c2 - 1 + aToB)) { |
| 279 | // Nothing to collapse. |
| 280 | collapsedBlocks.push(block); |
| 281 | } else { |
| 282 | // Split. Collapse c1 .. c2 range. |
| 283 | if (c1 > a1) { |
| 284 | collapsedBlocks.push(['=', [a1, c1, b1, c1 + aToB]]); |
| 285 | } |
| 286 | collapsedBlocks.push(['~', [c1, c2, c1 + aToB, c2 + aToB]]); |
| 287 | if (c2 < a2) { |
| 288 | collapsedBlocks.push(['=', [c2, a2, c2 + aToB, b2]]); |
| 289 | } |
| 290 | } |
| 291 | } |
| 292 | }); |
| 293 | return collapsedBlocks; |
| 294 | } |
| 295 | |
| 296 | /** |
| 297 | * Merge diffBlocks(a, b) and diffBlocks(c, b). |
| 298 | * Any difference (between a and b, or c and b) generates a `!` block. |
| 299 | * The (a1, a2) line numbers in the output blocks are changed to (b1, b2). |
| 300 | * Preserve empty (a1 == a2, b1 == b2) '!' blocks for context line calculation. |
| 301 | */ |
| 302 | export function mergeBlocks(abBlocks: Array<Block>, cbBlocks: Array<Block>): Array<Block> { |
| 303 | let i = 0; // Index of abBlocks. |
| 304 | let j = 0; // Index of cbBlocks. |
| 305 | let start = 0; // "Current" line index of b. |
| 306 | const result: Array<Block> = []; |
| 307 | |
| 308 | const push = (sign: Sign, end: number) => { |
| 309 | const last = result.at(-1); |
| 310 | if (last?.[0] === sign) { |
| 311 | last[1][1] = end; |
| 312 | last[1][3] = end; |
| 313 | } else { |
| 314 | result.push([sign, [start, end, start, end]]); |
| 315 | } |
| 316 | start = end; |
| 317 | }; |
| 318 | |
| 319 | while (i < abBlocks.length && j < cbBlocks.length) { |
| 320 | const [sign1, [, , b11, b12]] = abBlocks[i]; |
| 321 | if (b11 === b12 && b12 === start && sign1 === '!') { |
| 322 | push(sign1, start); |
| 323 | } |
| 324 | if (b12 <= start) { |
| 325 | ++i; |
| 326 | continue; |
| 327 | } |
| 328 | const [sign2, [, , b21, b22]] = cbBlocks[j]; |
| 329 | if (b21 === b22 && b21 === start && sign2 === '!') { |
| 330 | push(sign2, start); |
| 331 | } |
| 332 | if (b22 <= start) { |
| 333 | ++j; |
| 334 | continue; |
| 335 | } |
| 336 | |
| 337 | // Minimal "end" so there cannot be 2 different signs in the start-end range |
| 338 | // on either side. Note 2 sides might have different signs. |
| 339 | const end = Math.min(...[b11, b12, b21, b22].filter(i => i > start)); |
| 340 | |
| 341 | // Figure out the sign of the start-end range. |
| 342 | let sign: Sign = '='; |
| 343 | if ( |
| 344 | (start >= b11 && end <= b12 && sign1 === '!') || |
| 345 | (start >= b21 && end <= b22 && sign2 === '!') |
| 346 | ) { |
| 347 | sign = '!'; |
| 348 | } |
| 349 | push(sign, end); |
| 350 | } |
| 351 | |
| 352 | return result; |
| 353 | } |
| 354 | |
| 355 | /** Indicates whether a block is same or different on both sides. */ |
| 356 | export type Sign = '=' | '!'; |
| 357 | |
| 358 | /** Return type of `diffBlocks`. */ |
| 359 | export type Block = [Sign, [LineIdx, LineIdx, LineIdx, LineIdx]]; |
| 360 | |
| 361 | /** Return type of `collapseContextLines`. */ |
| 362 | export type ContextBlock = [Sign | '~', [LineIdx, LineIdx, LineIdx, LineIdx]]; |
| 363 | |
| 364 | /** |
| 365 | * Split lines by `\n`. Preserve the end of lines. |
| 366 | */ |
| 367 | export function splitLines(s: string): string[] { |
| 368 | let pos = 0; |
| 369 | let nextPos = 0; |
| 370 | const result = []; |
| 371 | while (pos < s.length) { |
| 372 | nextPos = s.indexOf('\n', pos); |
| 373 | if (nextPos === -1) { |
| 374 | nextPos = s.length - 1; |
| 375 | } |
| 376 | result.push(s.slice(pos, nextPos + 1)); |
| 377 | pos = nextPos + 1; |
| 378 | } |
| 379 | return result; |
| 380 | } |
| 381 | |
| 382 | /** |
| 383 | * Make strings with the same content use the same integer |
| 384 | * for fast comparison. |
| 385 | */ |
| 386 | function stringsToInts(linesArray: string[][]): number[][] { |
| 387 | // This is similar to diff-match-patch's diff_linesToChars_ but is not |
| 388 | // limited to 65536 unique lines. |
| 389 | const lineMap = new Map<string, number>(); |
| 390 | return linesArray.map(lines => |
| 391 | lines.map(line => { |
| 392 | const existingId = lineMap.get(line); |
| 393 | if (existingId != null) { |
| 394 | return existingId; |
| 395 | } else { |
| 396 | const id = lineMap.size; |
| 397 | lineMap.set(line, id); |
| 398 | return id; |
| 399 | } |
| 400 | }), |
| 401 | ); |
| 402 | } |
| 403 | |