addons/shared/diff.tsblame
View source
b69ab311/**
b69ab312 * Portions Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab313 *
b69ab314 * This source code is licensed under the MIT license found in the
b69ab315 * LICENSE file in the root directory of this source tree.
b69ab316 */
b69ab317
b69ab318/*
b69ab319
b69ab3110Copyright (c) 2020 Jun Wu
b69ab3111
b69ab3112Permission is hereby granted, free of charge, to any person obtaining a copy
b69ab3113of this software and associated documentation files (the "Software"), to deal
b69ab3114in the Software without restriction, including without limitation the rights
b69ab3115to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
b69ab3116copies of the Software, and to permit persons to whom the Software is
b69ab3117furnished to do so, subject to the following conditions:
b69ab3118
b69ab3119The above copyright notice and this permission notice shall be included in all
b69ab3120copies or substantial portions of the Software.
b69ab3121
b69ab3122THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
b69ab3123IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
b69ab3124FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
b69ab3125AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
b69ab3126LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
b69ab3127OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
b69ab3128SOFTWARE.
b69ab3129
b69ab3130*/
b69ab3131
b69ab3132// Read D43857949 about the choice of the diff library.
b69ab3133import diffSequences from 'diff-sequences';
b69ab3134
b69ab3135/** Index of a line. Starts from 0. */
b69ab3136export type LineIdx = number;
b69ab3137
b69ab3138/**
b69ab3139 * Calculate the line differences. For performance, this function only
b69ab3140 * returns the line indexes for different chunks. The line contents
b69ab3141 * are not returned.
b69ab3142 *
b69ab3143 * @param aLines lines on the "a" side.
b69ab3144 * @param bLines lines on the "b" side.
b69ab3145 * @param readable whether to use `readableDiffBlocks`.
b69ab3146 * @returns A list of `(a1, a2, b1, b2)` tuples for the line ranges that
b69ab3147 * are different between "a" and "b".
b69ab3148 */
b69ab3149export function diffLines(
b69ab3150 aLines: string[],
b69ab3151 bLines: string[],
b69ab3152 readable = true,
b69ab3153): [LineIdx, LineIdx, LineIdx, LineIdx][] {
b69ab3154 return (readable ? readableDiffBlocks : diffBlocks)(aLines, bLines)
b69ab3155 .filter(([sign, _range]) => sign === '!')
b69ab3156 .map(([_sign, range]) => range);
b69ab3157}
b69ab3158
b69ab3159/**
b69ab3160 * Calculate the line differences. For performance, this function returns
b69ab3161 * line ranges not line contents.
b69ab3162 *
b69ab3163 * Similar to Mercurial's `mdiff.allblocks`.
b69ab3164 *
b69ab3165 * @param aLines lines on the "a" side.
b69ab3166 * @param bLines lines on the "b" side.
b69ab3167 * @returns A list of `[sign, [a1, a2, b1, b2]]` tuples for the line ranges.
b69ab3168 * If `sign` is `'='`, the a1 to a2 range on the a side, and b1 to b2 range
b69ab3169 * on the b side are the same on both sides. Otherwise, `sign` is `'!'`
b69ab3170 * meaning the ranges are different.
b69ab3171 */
b69ab3172export function diffBlocks(aLines: string[], bLines: string[]): Array<Block> {
b69ab3173 // Avoid O(string length) comparison.
b69ab3174 const [aList, bList] = stringsToInts([aLines, bLines]);
b69ab3175 return diffIntBlocks(aList, bList);
b69ab3176}
b69ab3177
b69ab3178/**
b69ab3179 * Similar to `diffBlocks`, but attempt to produce more human-readable diff.
b69ab3180 *
b69ab3181 * Most of the time, a diff is less human-readable because "insignificant"
b69ab3182 * lines like "{" or "}" are matched. Practically, matching the actual code
b69ab3183 * (significant) lines should have high priority for "readability".
b69ab3184 *
b69ab3185 * To prioritize significant lines, run the main diff algorithm on the
b69ab3186 * significant lines first, force match the equal lines. Those equal lines
b69ab3187 * will split the insignificant lines into smaller regions. Run the diff
b69ab3188 * algorithm on those regions.
b69ab3189 *
b69ab3190 * Because insignificant lines are skipped, this function might not always
b69ab3191 * produce the theoretical minimal diff like `diffBlocks`.
b69ab3192 */
b69ab3193export function readableDiffBlocks(aLines: string[], bLines: string[]): Array<Block> {
b69ab3194 const aIsSignificant = aLines.map(isSignificantLine);
b69ab3195 const bIsSignificant = bLines.map(isSignificantLine);
b69ab3196 const insignificantCount =
b69ab3197 aIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0) +
b69ab3198 bIsSignificant.reduce((a, b) => a + (b ? 0 : 1), 0);
b69ab3199 if (insignificantCount > (aLines.length + bLines.length + 3) / 2) {
b69ab31100 // Too many insignificant lines. The algorithm might be a bad fit.
b69ab31101 // Use regular diff instead.
b69ab31102 return diffBlocks(aLines, bLines);
b69ab31103 }
b69ab31104
b69ab31105 // Assign integer ids to lines.
b69ab31106 const [aFull, bFull] = stringsToInts([aLines, bLines]);
b69ab31107
b69ab31108 // Significant lines.
b69ab31109 const aSignificant = aFull.filter((_l, i) => aIsSignificant[i]);
b69ab31110 const bSignificant = bFull.filter((_l, i) => bIsSignificant[i]);
b69ab31111
b69ab31112 // Index offset. aSignificant[i] == aFull[aSigToFull[i]].
b69ab31113 const aSigToFull = calculateSigToFull(aIsSignificant);
b69ab31114 const bSigToFull = calculateSigToFull(bIsSignificant);
b69ab31115
b69ab31116 let aLast = 0;
b69ab31117 let bLast = 0;
b69ab31118 const result: Array<Block> = [];
b69ab31119 const push = (blocks: ReadonlyArray<Block>, aOffset = 0, bOffset = 0) => {
b69ab31120 blocks.forEach(([sign, [origA1, origA2, origB1, origB2]]) => {
b69ab31121 const a1 = origA1 + aOffset;
b69ab31122 const a2 = origA2 + aOffset;
b69ab31123 const b1 = origB1 + bOffset;
b69ab31124 const b2 = origB2 + bOffset;
b69ab31125 const last = result.at(-1);
b69ab31126 if (last?.[0] === sign && last[1][1] === a1 && last[1][3] === b1) {
b69ab31127 last[1][1] = a2;
b69ab31128 last[1][3] = b2;
b69ab31129 } else {
b69ab31130 result.push([sign, [a1, a2, b1, b2]]);
b69ab31131 }
b69ab31132 });
b69ab31133 };
b69ab31134
b69ab31135 diffIntBlocks(aSignificant, bSignificant).forEach(
b69ab31136 ([sign, [aSigStart, aSigEnd, bSigStart, _bSigEnd]]) => {
b69ab31137 if (sign === '=') {
b69ab31138 for (let a1Sig = aSigStart; a1Sig < aSigEnd; a1Sig++) {
b69ab31139 const b1Sig = bSigStart + (a1Sig - aSigStart);
b69ab31140 const [a1, b1] = [aSigToFull[a1Sig], bSigToFull[b1Sig]];
b69ab31141 const [a2, b2] = [a1 + 1, b1 + 1];
b69ab31142 // Force match the two sides. Run regular diff on the insignificant region.
b69ab31143 // .. aLast | aLast .. a1 | a1 .. a2 | a2 ...
b69ab31144 // .. aLast | bLast .. b1 | b1 .. b2 | b2 ...
b69ab31145 // ----------------------------------------------------------------
b69ab31146 // already in | insignificant | significant = | rest
b69ab31147 // result | to diff | force matched | to figure out later
b69ab31148 push(diffIntBlocks(aFull.slice(aLast, a1), bFull.slice(bLast, b1)), aLast, bLast);
b69ab31149 push([['=', [a1, a2, b1, b2]]]);
b69ab31150 [aLast, bLast] = [a2, b2];
b69ab31151 }
b69ab31152 }
b69ab31153 },
b69ab31154 );
b69ab31155 if (aLast < aFull.length || bLast < bFull.length) {
b69ab31156 push(diffIntBlocks(aFull.slice(aLast), bFull.slice(bLast)), aLast, bLast);
b69ab31157 }
b69ab31158 return result;
b69ab31159}
b69ab31160
b69ab31161/**
b69ab31162 * Given a boolean array a[i], calculate b[i] so b[i] is the i-th `true` index in a[i],
b69ab31163 * aka. a.slice(0, b[i]) has a total of `i` `true` values.
b69ab31164 */
b69ab31165function calculateSigToFull(isSignificant: ReadonlyArray<boolean>): ReadonlyArray<number> {
b69ab31166 const result: number[] = [];
b69ab31167 for (let i = 0; i < isSignificant.length; i++) {
b69ab31168 if (isSignificant[i]) {
b69ab31169 result.push(i);
b69ab31170 }
b69ab31171 }
b69ab31172 result.push(isSignificant.length);
b69ab31173 return result;
b69ab31174}
b69ab31175
b69ab31176/**
b69ab31177 * Test if a line is "significant". Used by `readableDiffBlocks`.
b69ab31178 * Blank lines, "{", "}", "/**" are insignificant.
b69ab31179 */
b69ab31180function isSignificantLine(line: string): boolean {
b69ab31181 return line.match(/^[\s{}/*]*$/) == null;
b69ab31182}
b69ab31183
b69ab31184/**
b69ab31185 * Similar to `diffBlocks`, but takes integer array instead.
b69ab31186 */
b69ab31187function diffIntBlocks(aList: ReadonlyArray<number>, bList: ReadonlyArray<number>): Array<Block> {
b69ab31188 // Skip common prefix and suffix.
b69ab31189 let aLen = aList.length;
b69ab31190 let bLen = bList.length;
b69ab31191 const minLen = Math.min(aLen, bLen);
b69ab31192 let commonPrefixLen = 0;
b69ab31193 let commonSuffixLen = 0;
b69ab31194 while (commonPrefixLen < minLen && aList[commonPrefixLen] === bList[commonPrefixLen]) {
b69ab31195 commonPrefixLen += 1;
b69ab31196 }
b69ab31197 while (aLen > commonPrefixLen && bLen > commonPrefixLen && aList[aLen - 1] === bList[bLen - 1]) {
b69ab31198 aLen -= 1;
b69ab31199 bLen -= 1;
b69ab31200 commonSuffixLen += 1;
b69ab31201 }
b69ab31202 aLen -= commonPrefixLen;
b69ab31203 bLen -= commonPrefixLen;
b69ab31204
b69ab31205 const blocks: Array<Block> = [];
b69ab31206 if (commonPrefixLen > 0) {
b69ab31207 blocks.push(['=', [0, commonPrefixLen, 0, commonPrefixLen]]);
b69ab31208 }
b69ab31209
b69ab31210 // Run the diff algorithm.
b69ab31211 let a1 = 0;
b69ab31212 let b1 = 0;
b69ab31213
b69ab31214 function isCommon(aIndex: number, bIndex: number) {
b69ab31215 return aList[aIndex + commonPrefixLen] === bList[bIndex + commonPrefixLen];
b69ab31216 }
b69ab31217
b69ab31218 function foundSequence(n: LineIdx, a2: LineIdx, b2: LineIdx) {
b69ab31219 if (a1 !== a2 || b1 !== b2) {
b69ab31220 blocks.push([
b69ab31221 '!',
b69ab31222 [a1 + commonPrefixLen, a2 + commonPrefixLen, b1 + commonPrefixLen, b2 + commonPrefixLen],
b69ab31223 ]);
b69ab31224 }
b69ab31225 if (n > 0) {
b69ab31226 blocks.push([
b69ab31227 '=',
b69ab31228 [
b69ab31229 a2 + commonPrefixLen,
b69ab31230 a2 + n + commonPrefixLen,
b69ab31231 b2 + commonPrefixLen,
b69ab31232 b2 + n + commonPrefixLen,
b69ab31233 ],
b69ab31234 ]);
b69ab31235 }
b69ab31236 a1 = a2 + n;
b69ab31237 b1 = b2 + n;
b69ab31238 }
b69ab31239
b69ab31240 diffSequences(aLen, bLen, isCommon, foundSequence);
b69ab31241 foundSequence(commonSuffixLen, aLen, bLen);
b69ab31242
b69ab31243 return blocks;
b69ab31244}
b69ab31245
b69ab31246/**
b69ab31247 * Post process `blocks` from `diffBlocks` to collapse unchanged lines.
b69ab31248 * `contextLineCount` lines before or after a `!` (changed) block are
b69ab31249 * not collapsed.
b69ab31250 *
b69ab31251 * If `isALineExpanded(aLine, bLine)` returns `true`, then the _block_
b69ab31252 * is expanded.
b69ab31253 *
b69ab31254 * Split `=` blocks into `=` and `~` blocks. The `~` blocks are expected
b69ab31255 * to be collapsed.
b69ab31256 */
b69ab31257export function collapseContextBlocks(
b69ab31258 blocks: Array<Block>,
b69ab31259 isLineExpanded: (aLine: LineIdx, bLine: LineIdx) => boolean,
b69ab31260 contextLineCount = 3,
b69ab31261): Array<ContextBlock> {
b69ab31262 const collapsedBlocks: Array<ContextBlock> = [];
b69ab31263 blocks.forEach((block, i) => {
b69ab31264 const [sign, [a1, a2, b1, b2]] = block;
b69ab31265 if (sign === '!') {
b69ab31266 collapsedBlocks.push(block);
b69ab31267 } else if (sign === '=') {
b69ab31268 // a1 ... a1 + topContext ... a2 - bottomContext ... a2
b69ab31269 // ^^^ collapse this range (c1 to c2)
b69ab31270 // The topContext and bottomContext can be 0 lines if they are not adjacent
b69ab31271 // to a diff block.
b69ab31272 const topContext = i == 0 || blocks[i - 1][0] !== '!' ? 0 : contextLineCount;
b69ab31273 const bottomContext =
b69ab31274 i + 1 == blocks.length || blocks[i + 1][0] !== '!' ? 0 : contextLineCount;
b69ab31275 const c1 = Math.min(a1 + topContext, a2);
b69ab31276 const c2 = Math.max(c1, a2 - bottomContext);
b69ab31277 const aToB = b1 - a1;
b69ab31278 if (c1 >= c2 || isLineExpanded(c1, c1 + aToB) || isLineExpanded(c2 - 1, c2 - 1 + aToB)) {
b69ab31279 // Nothing to collapse.
b69ab31280 collapsedBlocks.push(block);
b69ab31281 } else {
b69ab31282 // Split. Collapse c1 .. c2 range.
b69ab31283 if (c1 > a1) {
b69ab31284 collapsedBlocks.push(['=', [a1, c1, b1, c1 + aToB]]);
b69ab31285 }
b69ab31286 collapsedBlocks.push(['~', [c1, c2, c1 + aToB, c2 + aToB]]);
b69ab31287 if (c2 < a2) {
b69ab31288 collapsedBlocks.push(['=', [c2, a2, c2 + aToB, b2]]);
b69ab31289 }
b69ab31290 }
b69ab31291 }
b69ab31292 });
b69ab31293 return collapsedBlocks;
b69ab31294}
b69ab31295
b69ab31296/**
b69ab31297 * Merge diffBlocks(a, b) and diffBlocks(c, b).
b69ab31298 * Any difference (between a and b, or c and b) generates a `!` block.
b69ab31299 * The (a1, a2) line numbers in the output blocks are changed to (b1, b2).
b69ab31300 * Preserve empty (a1 == a2, b1 == b2) '!' blocks for context line calculation.
b69ab31301 */
b69ab31302export function mergeBlocks(abBlocks: Array<Block>, cbBlocks: Array<Block>): Array<Block> {
b69ab31303 let i = 0; // Index of abBlocks.
b69ab31304 let j = 0; // Index of cbBlocks.
b69ab31305 let start = 0; // "Current" line index of b.
b69ab31306 const result: Array<Block> = [];
b69ab31307
b69ab31308 const push = (sign: Sign, end: number) => {
b69ab31309 const last = result.at(-1);
b69ab31310 if (last?.[0] === sign) {
b69ab31311 last[1][1] = end;
b69ab31312 last[1][3] = end;
b69ab31313 } else {
b69ab31314 result.push([sign, [start, end, start, end]]);
b69ab31315 }
b69ab31316 start = end;
b69ab31317 };
b69ab31318
b69ab31319 while (i < abBlocks.length && j < cbBlocks.length) {
b69ab31320 const [sign1, [, , b11, b12]] = abBlocks[i];
b69ab31321 if (b11 === b12 && b12 === start && sign1 === '!') {
b69ab31322 push(sign1, start);
b69ab31323 }
b69ab31324 if (b12 <= start) {
b69ab31325 ++i;
b69ab31326 continue;
b69ab31327 }
b69ab31328 const [sign2, [, , b21, b22]] = cbBlocks[j];
b69ab31329 if (b21 === b22 && b21 === start && sign2 === '!') {
b69ab31330 push(sign2, start);
b69ab31331 }
b69ab31332 if (b22 <= start) {
b69ab31333 ++j;
b69ab31334 continue;
b69ab31335 }
b69ab31336
b69ab31337 // Minimal "end" so there cannot be 2 different signs in the start-end range
b69ab31338 // on either side. Note 2 sides might have different signs.
b69ab31339 const end = Math.min(...[b11, b12, b21, b22].filter(i => i > start));
b69ab31340
b69ab31341 // Figure out the sign of the start-end range.
b69ab31342 let sign: Sign = '=';
b69ab31343 if (
b69ab31344 (start >= b11 && end <= b12 && sign1 === '!') ||
b69ab31345 (start >= b21 && end <= b22 && sign2 === '!')
b69ab31346 ) {
b69ab31347 sign = '!';
b69ab31348 }
b69ab31349 push(sign, end);
b69ab31350 }
b69ab31351
b69ab31352 return result;
b69ab31353}
b69ab31354
b69ab31355/** Indicates whether a block is same or different on both sides. */
b69ab31356export type Sign = '=' | '!';
b69ab31357
b69ab31358/** Return type of `diffBlocks`. */
b69ab31359export type Block = [Sign, [LineIdx, LineIdx, LineIdx, LineIdx]];
b69ab31360
b69ab31361/** Return type of `collapseContextLines`. */
b69ab31362export type ContextBlock = [Sign | '~', [LineIdx, LineIdx, LineIdx, LineIdx]];
b69ab31363
b69ab31364/**
b69ab31365 * Split lines by `\n`. Preserve the end of lines.
b69ab31366 */
b69ab31367export function splitLines(s: string): string[] {
b69ab31368 let pos = 0;
b69ab31369 let nextPos = 0;
b69ab31370 const result = [];
b69ab31371 while (pos < s.length) {
b69ab31372 nextPos = s.indexOf('\n', pos);
b69ab31373 if (nextPos === -1) {
b69ab31374 nextPos = s.length - 1;
b69ab31375 }
b69ab31376 result.push(s.slice(pos, nextPos + 1));
b69ab31377 pos = nextPos + 1;
b69ab31378 }
b69ab31379 return result;
b69ab31380}
b69ab31381
b69ab31382/**
b69ab31383 * Make strings with the same content use the same integer
b69ab31384 * for fast comparison.
b69ab31385 */
b69ab31386function stringsToInts(linesArray: string[][]): number[][] {
b69ab31387 // This is similar to diff-match-patch's diff_linesToChars_ but is not
b69ab31388 // limited to 65536 unique lines.
b69ab31389 const lineMap = new Map<string, number>();
b69ab31390 return linesArray.map(lines =>
b69ab31391 lines.map(line => {
b69ab31392 const existingId = lineMap.get(line);
b69ab31393 if (existingId != null) {
b69ab31394 return existingId;
b69ab31395 } else {
b69ab31396 const id = lineMap.size;
b69ab31397 lineMap.set(line, id);
b69ab31398 return id;
b69ab31399 }
b69ab31400 }),
b69ab31401 );
b69ab31402}