addons/shared/createTokenizedIntralineDiff.tsxblame
View source
b69ab311/**
b69ab312 * Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab313 *
b69ab314 * This source code is licensed under the MIT license found in the
b69ab315 * LICENSE file in the root directory of this source tree.
b69ab316 */
b69ab317
b69ab318import type {HighlightedToken} from './textmate-lib/tokenize';
b69ab319
b69ab3110import {diffWordsWithSpace} from 'diff';
b69ab3111import {CSS_CLASS_PREFIX} from './textmate-lib/textmateStyles';
b69ab3112
b69ab3113/** Type of Chunk: Added, Removed, Unmodified. */
b69ab3114type ChunkType = 'A' | 'R' | 'U';
b69ab3115
b69ab3116/**
b69ab3117 * The Myers difference algorithm used by the `diff` Node module is O(ND) where
b69ab3118 * N is the sum of the lengths of the two inputs and D is size of the minimum
b69ab3119 * edit script for the two inputs. As such, large values of N can result in
b69ab3120 * extremely long running times while likely providing little value for the
b69ab3121 * user. For example, a large blob of JSON on a single line compared with the
b69ab3122 * first line of the pretty-printed version (containing only `{`) could be an
b69ab3123 * expensive diff to compute while telling the user nothing of interest.
b69ab3124 *
b69ab3125 * To defend against such pathological cases, we should not bother to compute
b69ab3126 * the intraline diff in certain cases. As an initial heuristic, we impose a
b69ab3127 * threshold for the "maximum input" (i.e., the sum of the lengths of the
b69ab3128 * strings being compared) for which an intraline diff should be computed.
b69ab3129 *
b69ab3130 * Incidentally, tokenization for syntax highlighting has similar issues. At
b69ab3131 * least as of Oct 2022, rather than impose a "max line length," VS Code imposes
b69ab3132 * a "time spent" threshold:
b69ab3133 *
b69ab3134 * https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39
b69ab3135 *
b69ab3136 * It might be worth considering something similar for intraline diffs.
b69ab3137 */
b69ab3138export const MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF = 300;
b69ab3139
b69ab3140/**
b69ab3141 * Normalized version of a `diff.Change` returned by `diffWords()` that is
b69ab3142 * easier to work with when interleaving with syntax highlight information.
b69ab3143 */
b69ab3144type Chunk = {
b69ab3145 type: ChunkType;
b69ab3146 start: number;
b69ab3147 end: number;
b69ab3148};
b69ab3149
b69ab3150/**
b69ab3151 * Takes a modified line in the form of `beforeLine` and `afterLine` along with
b69ab3152 * syntax highlighting information that covers each respective line and
b69ab3153 * returns a ReactFragment to display the before/after versions of a line in a
b69ab3154 * side-by-side diff.
b69ab3155 */
b69ab3156export function createTokenizedIntralineDiff(
b69ab3157 beforeLine: string,
b69ab3158 beforeTokens: HighlightedToken[],
b69ab3159 afterLine: string,
b69ab3160 afterTokens: HighlightedToken[],
b69ab3161): [React.ReactFragment | null, React.ReactFragment | null] {
b69ab3162 if (beforeLine.length + afterLine.length > MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF) {
b69ab3163 return [
b69ab3164 applyTokenizationToLine(beforeLine, beforeTokens),
b69ab3165 applyTokenizationToLine(afterLine, afterTokens),
b69ab3166 ];
b69ab3167 }
b69ab3168
b69ab3169 const changes = diffWordsWithSpace(beforeLine, afterLine);
b69ab3170 const beforeChunks: Chunk[] = [];
b69ab3171 const afterChunks: Chunk[] = [];
b69ab3172 let beforeLength = 0;
b69ab3173 let afterLength = 0;
b69ab3174
b69ab3175 changes.forEach(change => {
b69ab3176 const {added, removed, value} = change;
b69ab3177 const len = value.length;
b69ab3178 if (added) {
b69ab3179 const end = afterLength + len;
b69ab3180 addOrExtend(afterChunks, 'A', afterLength, end);
b69ab3181 afterLength = end;
b69ab3182 } else if (removed) {
b69ab3183 const end = beforeLength + len;
b69ab3184 addOrExtend(beforeChunks, 'R', beforeLength, end);
b69ab3185 beforeLength = end;
b69ab3186 } else {
b69ab3187 const beforeEnd = beforeLength + len;
b69ab3188 addOrExtend(beforeChunks, 'U', beforeLength, beforeEnd);
b69ab3189 beforeLength = beforeEnd;
b69ab3190
b69ab3191 const afterEnd = afterLength + len;
b69ab3192 addOrExtend(afterChunks, 'U', afterLength, afterEnd);
b69ab3193 afterLength = afterEnd;
b69ab3194 }
b69ab3195 });
b69ab3196
b69ab3197 // Note that the logic in mergeChunksAndTokens() could be done as part of this
b69ab3198 // function to avoid an additional pass over the chunks, but the bookkeeping
b69ab3199 // might get messy.
b69ab31100 return [
b69ab31101 mergeChunksAndTokens(beforeLine, beforeChunks, beforeTokens),
b69ab31102 mergeChunksAndTokens(afterLine, afterChunks, afterTokens),
b69ab31103 ];
b69ab31104}
b69ab31105
b69ab31106function addOrExtend(chunks: Chunk[], type: ChunkType, start: number, end: number) {
b69ab31107 const lastEntry = chunks[chunks.length - 1];
b69ab31108 if (lastEntry?.type === type && lastEntry?.end === start) {
b69ab31109 lastEntry.end = end;
b69ab31110 } else {
b69ab31111 chunks.push({type, start, end});
b69ab31112 }
b69ab31113}
b69ab31114
b69ab31115/** TODO: Create proper machinery to strip assertions from production builds. */
b69ab31116const ENABLE_ASSERTIONS = false;
b69ab31117
b69ab31118type ChunkSpanProps = {
b69ab31119 key: number;
b69ab31120 className: string;
b69ab31121 content: string;
b69ab31122 isChunkStart: boolean;
b69ab31123 isChunkEnd: boolean;
b69ab31124};
b69ab31125
b69ab31126function ChunkSpan({
b69ab31127 key,
b69ab31128 className,
b69ab31129 content,
b69ab31130 isChunkStart,
b69ab31131 isChunkEnd,
b69ab31132}: ChunkSpanProps): React.ReactNode {
b69ab31133 let fullClassName = className;
b69ab31134 if (isChunkStart) {
b69ab31135 fullClassName += ' patch-word-begin';
b69ab31136 }
b69ab31137 if (isChunkEnd) {
b69ab31138 fullClassName += ' patch-word-end';
b69ab31139 }
b69ab31140 return (
b69ab31141 <span key={key} className={fullClassName}>
b69ab31142 {content}
b69ab31143 </span>
b69ab31144 );
b69ab31145}
b69ab31146
b69ab31147/**
b69ab31148 * Interleave chunks and tokens to produce a properly styled intraline diff.
b69ab31149 */
b69ab31150function mergeChunksAndTokens(
b69ab31151 line: string,
b69ab31152 chunks: Chunk[],
b69ab31153 tokens: HighlightedToken[],
b69ab31154): React.ReactFragment | null {
b69ab31155 if (tokens.length == 0) {
b69ab31156 return null;
b69ab31157 }
b69ab31158
b69ab31159 if (ENABLE_ASSERTIONS) {
b69ab31160 // We expect the following invariants to hold, by construction.
b69ab31161 // eslint-disable-next-line no-console
b69ab31162 console.assert(
b69ab31163 chunks.length !== 0,
b69ab31164 'chunks is never empty, even if the line is the empty string',
b69ab31165 );
b69ab31166 // eslint-disable-next-line no-console
b69ab31167 console.assert(
b69ab31168 chunks[chunks.length - 1].end === tokens[tokens.length - 1].end,
b69ab31169 'the final chunk and token must have the same end index to ensure the loop breaks properly',
b69ab31170 );
b69ab31171 }
b69ab31172
b69ab31173 const spans: ChunkSpanProps[] = [];
b69ab31174 let chunkIndex = 0;
b69ab31175 let tokenIndex = 0;
b69ab31176 let lastEndIndex = 0;
b69ab31177 let lastChunkType: ChunkType = 'U';
b69ab31178 let isChunkStart = false;
b69ab31179 const maxChunkIndex = chunks.length;
b69ab31180 const maxTokenIndex = tokens.length;
b69ab31181 while (chunkIndex < maxChunkIndex && tokenIndex < maxTokenIndex) {
b69ab31182 const chunk = chunks[chunkIndex];
b69ab31183 const token = tokens[tokenIndex];
b69ab31184 const start = lastEndIndex;
b69ab31185 if (chunk.end === token.end) {
b69ab31186 lastEndIndex = chunk.end;
b69ab31187 ++chunkIndex;
b69ab31188 ++tokenIndex;
b69ab31189 } else if (chunk.end < token.end) {
b69ab31190 lastEndIndex = chunk.end;
b69ab31191 ++chunkIndex;
b69ab31192 } else {
b69ab31193 lastEndIndex = token.end;
b69ab31194 ++tokenIndex;
b69ab31195 }
b69ab31196
b69ab31197 const chunkType = chunk.type;
b69ab31198 if (lastChunkType !== 'U' && chunkType !== lastChunkType) {
b69ab31199 spans[spans.length - 1].isChunkEnd = true;
b69ab31200 }
b69ab31201 isChunkStart = chunkType !== 'U' && (chunkType !== lastChunkType || spans.length === 0);
b69ab31202 spans.push(createSpan(line, start, lastEndIndex, token.color, chunkType, isChunkStart));
b69ab31203 lastChunkType = chunkType;
b69ab31204 }
b69ab31205
b69ab31206 // Check if last span needs to have isChunkEnd set.
b69ab31207 if (lastChunkType !== 'U') {
b69ab31208 spans[spans.length - 1].isChunkEnd = true;
b69ab31209 }
b69ab31210
b69ab31211 return spans.map(ChunkSpan);
b69ab31212}
b69ab31213
b69ab31214/**
b69ab31215 * Creates the <span> with the appropriate CSS classes to display a portion of
b69ab31216 * an intraline diff with syntax highlighting.
b69ab31217 */
b69ab31218function createSpan(
b69ab31219 line: string,
b69ab31220 start: number,
b69ab31221 end: number,
b69ab31222 color: number,
b69ab31223 type: ChunkType,
b69ab31224 isChunkStart: boolean,
b69ab31225): ChunkSpanProps {
b69ab31226 let patchClass;
b69ab31227 switch (type) {
b69ab31228 case 'U':
b69ab31229 patchClass = '';
b69ab31230 break;
b69ab31231 case 'A':
b69ab31232 patchClass = ' patch-add-word';
b69ab31233 break;
b69ab31234 case 'R':
b69ab31235 patchClass = ' patch-remove-word';
b69ab31236 break;
b69ab31237 }
b69ab31238
b69ab31239 const className = `${CSS_CLASS_PREFIX}${color}${patchClass}`;
b69ab31240 return {
b69ab31241 key: start,
b69ab31242 className,
b69ab31243 content: line.slice(start, end),
b69ab31244 isChunkStart,
b69ab31245 isChunkEnd: false,
b69ab31246 };
b69ab31247}
b69ab31248
b69ab31249export function applyTokenizationToLine(
b69ab31250 line: string,
b69ab31251 tokenization: readonly HighlightedToken[],
b69ab31252): React.ReactFragment {
b69ab31253 return tokenization.map(({start, end, color}) => {
b69ab31254 return (
b69ab31255 <span key={start} className={`${CSS_CLASS_PREFIX}${color}`}>
b69ab31256 {line.slice(start, end)}
b69ab31257 </span>
b69ab31258 );
b69ab31259 });
b69ab31260}