Blame · createTokenizedIntralineDiff.tsx

addons/shared/createTokenizedIntralineDiff.tsxblame

b69ab31	1	/**
b69ab31	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab31	3	*
b69ab31	4	* This source code is licensed under the MIT license found in the
b69ab31	5	* LICENSE file in the root directory of this source tree.
b69ab31	6	*/
b69ab31	7
b69ab31	8	import type {HighlightedToken} from './textmate-lib/tokenize';
b69ab31	9
b69ab31	10	import {diffWordsWithSpace} from 'diff';
b69ab31	11	import {CSS_CLASS_PREFIX} from './textmate-lib/textmateStyles';
b69ab31	12
b69ab31	13	/** Type of Chunk: Added, Removed, Unmodified. */
b69ab31	14	type ChunkType = 'A' \| 'R' \| 'U';
b69ab31	15
b69ab31	16	/**
b69ab31	17	* The Myers difference algorithm used by the `diff` Node module is O(ND) where
b69ab31	18	* N is the sum of the lengths of the two inputs and D is size of the minimum
b69ab31	19	* edit script for the two inputs. As such, large values of N can result in
b69ab31	20	* extremely long running times while likely providing little value for the
b69ab31	21	* user. For example, a large blob of JSON on a single line compared with the
b69ab31	22	* first line of the pretty-printed version (containing only `{`) could be an
b69ab31	23	* expensive diff to compute while telling the user nothing of interest.
b69ab31	24	*
b69ab31	25	* To defend against such pathological cases, we should not bother to compute
b69ab31	26	* the intraline diff in certain cases. As an initial heuristic, we impose a
b69ab31	27	* threshold for the "maximum input" (i.e., the sum of the lengths of the
b69ab31	28	* strings being compared) for which an intraline diff should be computed.
b69ab31	29	*
b69ab31	30	* Incidentally, tokenization for syntax highlighting has similar issues. At
b69ab31	31	* least as of Oct 2022, rather than impose a "max line length," VS Code imposes
b69ab31	32	* a "time spent" threshold:
b69ab31	33	*
b69ab31	34	* https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39
b69ab31	35	*
b69ab31	36	* It might be worth considering something similar for intraline diffs.
b69ab31	37	*/
b69ab31	38	export const MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF = 300;
b69ab31	39
b69ab31	40	/**
b69ab31	41	* Normalized version of a `diff.Change` returned by `diffWords()` that is
b69ab31	42	* easier to work with when interleaving with syntax highlight information.
b69ab31	43	*/
b69ab31	44	type Chunk = {
b69ab31	45	type: ChunkType;
b69ab31	46	start: number;
b69ab31	47	end: number;
b69ab31	48	};
b69ab31	49
b69ab31	50	/**
b69ab31	51	* Takes a modified line in the form of `beforeLine` and `afterLine` along with
b69ab31	52	* syntax highlighting information that covers each respective line and
b69ab31	53	* returns a ReactFragment to display the before/after versions of a line in a
b69ab31	54	* side-by-side diff.
b69ab31	55	*/
b69ab31	56	export function createTokenizedIntralineDiff(
b69ab31	57	beforeLine: string,
b69ab31	58	beforeTokens: HighlightedToken[],
b69ab31	59	afterLine: string,
b69ab31	60	afterTokens: HighlightedToken[],
b69ab31	61	): [React.ReactFragment \| null, React.ReactFragment \| null] {
b69ab31	62	if (beforeLine.length + afterLine.length > MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF) {
b69ab31	63	return [
b69ab31	64	applyTokenizationToLine(beforeLine, beforeTokens),
b69ab31	65	applyTokenizationToLine(afterLine, afterTokens),
b69ab31	66	];
b69ab31	67	}
b69ab31	68
b69ab31	69	const changes = diffWordsWithSpace(beforeLine, afterLine);
b69ab31	70	const beforeChunks: Chunk[] = [];
b69ab31	71	const afterChunks: Chunk[] = [];
b69ab31	72	let beforeLength = 0;
b69ab31	73	let afterLength = 0;
b69ab31	74
b69ab31	75	changes.forEach(change => {
b69ab31	76	const {added, removed, value} = change;
b69ab31	77	const len = value.length;
b69ab31	78	if (added) {
b69ab31	79	const end = afterLength + len;
b69ab31	80	addOrExtend(afterChunks, 'A', afterLength, end);
b69ab31	81	afterLength = end;
b69ab31	82	} else if (removed) {
b69ab31	83	const end = beforeLength + len;
b69ab31	84	addOrExtend(beforeChunks, 'R', beforeLength, end);
b69ab31	85	beforeLength = end;
b69ab31	86	} else {
b69ab31	87	const beforeEnd = beforeLength + len;
b69ab31	88	addOrExtend(beforeChunks, 'U', beforeLength, beforeEnd);
b69ab31	89	beforeLength = beforeEnd;
b69ab31	90
b69ab31	91	const afterEnd = afterLength + len;
b69ab31	92	addOrExtend(afterChunks, 'U', afterLength, afterEnd);
b69ab31	93	afterLength = afterEnd;
b69ab31	94	}
b69ab31	95	});
b69ab31	96
b69ab31	97	// Note that the logic in mergeChunksAndTokens() could be done as part of this
b69ab31	98	// function to avoid an additional pass over the chunks, but the bookkeeping
b69ab31	99	// might get messy.
b69ab31	100	return [
b69ab31	101	mergeChunksAndTokens(beforeLine, beforeChunks, beforeTokens),
b69ab31	102	mergeChunksAndTokens(afterLine, afterChunks, afterTokens),
b69ab31	103	];
b69ab31	104	}
b69ab31	105
b69ab31	106	function addOrExtend(chunks: Chunk[], type: ChunkType, start: number, end: number) {
b69ab31	107	const lastEntry = chunks[chunks.length - 1];
b69ab31	108	if (lastEntry?.type === type && lastEntry?.end === start) {
b69ab31	109	lastEntry.end = end;
b69ab31	110	} else {
b69ab31	111	chunks.push({type, start, end});
b69ab31	112	}
b69ab31	113	}
b69ab31	114
b69ab31	115	/** TODO: Create proper machinery to strip assertions from production builds. */
b69ab31	116	const ENABLE_ASSERTIONS = false;
b69ab31	117
b69ab31	118	type ChunkSpanProps = {
b69ab31	119	key: number;
b69ab31	120	className: string;
b69ab31	121	content: string;
b69ab31	122	isChunkStart: boolean;
b69ab31	123	isChunkEnd: boolean;
b69ab31	124	};
b69ab31	125
b69ab31	126	function ChunkSpan({
b69ab31	127	key,
b69ab31	128	className,
b69ab31	129	content,
b69ab31	130	isChunkStart,
b69ab31	131	isChunkEnd,
b69ab31	132	}: ChunkSpanProps): React.ReactNode {
b69ab31	133	let fullClassName = className;
b69ab31	134	if (isChunkStart) {
b69ab31	135	fullClassName += ' patch-word-begin';
b69ab31	136	}
b69ab31	137	if (isChunkEnd) {
b69ab31	138	fullClassName += ' patch-word-end';
b69ab31	139	}
b69ab31	140	return (
b69ab31	141	<span key={key} className={fullClassName}>
b69ab31	142	{content}
b69ab31	143	</span>
b69ab31	144	);
b69ab31	145	}
b69ab31	146
b69ab31	147	/**
b69ab31	148	* Interleave chunks and tokens to produce a properly styled intraline diff.
b69ab31	149	*/
b69ab31	150	function mergeChunksAndTokens(
b69ab31	151	line: string,
b69ab31	152	chunks: Chunk[],
b69ab31	153	tokens: HighlightedToken[],
b69ab31	154	): React.ReactFragment \| null {
b69ab31	155	if (tokens.length == 0) {
b69ab31	156	return null;
b69ab31	157	}
b69ab31	158
b69ab31	159	if (ENABLE_ASSERTIONS) {
b69ab31	160	// We expect the following invariants to hold, by construction.
b69ab31	161	// eslint-disable-next-line no-console
b69ab31	162	console.assert(
b69ab31	163	chunks.length !== 0,
b69ab31	164	'chunks is never empty, even if the line is the empty string',
b69ab31	165	);
b69ab31	166	// eslint-disable-next-line no-console
b69ab31	167	console.assert(
b69ab31	168	chunks[chunks.length - 1].end === tokens[tokens.length - 1].end,
b69ab31	169	'the final chunk and token must have the same end index to ensure the loop breaks properly',
b69ab31	170	);
b69ab31	171	}
b69ab31	172
b69ab31	173	const spans: ChunkSpanProps[] = [];
b69ab31	174	let chunkIndex = 0;
b69ab31	175	let tokenIndex = 0;
b69ab31	176	let lastEndIndex = 0;
b69ab31	177	let lastChunkType: ChunkType = 'U';
b69ab31	178	let isChunkStart = false;
b69ab31	179	const maxChunkIndex = chunks.length;
b69ab31	180	const maxTokenIndex = tokens.length;
b69ab31	181	while (chunkIndex < maxChunkIndex && tokenIndex < maxTokenIndex) {
b69ab31	182	const chunk = chunks[chunkIndex];
b69ab31	183	const token = tokens[tokenIndex];
b69ab31	184	const start = lastEndIndex;
b69ab31	185	if (chunk.end === token.end) {
b69ab31	186	lastEndIndex = chunk.end;
b69ab31	187	++chunkIndex;
b69ab31	188	++tokenIndex;
b69ab31	189	} else if (chunk.end < token.end) {
b69ab31	190	lastEndIndex = chunk.end;
b69ab31	191	++chunkIndex;
b69ab31	192	} else {
b69ab31	193	lastEndIndex = token.end;
b69ab31	194	++tokenIndex;
b69ab31	195	}
b69ab31	196
b69ab31	197	const chunkType = chunk.type;
b69ab31	198	if (lastChunkType !== 'U' && chunkType !== lastChunkType) {
b69ab31	199	spans[spans.length - 1].isChunkEnd = true;
b69ab31	200	}
b69ab31	201	isChunkStart = chunkType !== 'U' && (chunkType !== lastChunkType \|\| spans.length === 0);
b69ab31	202	spans.push(createSpan(line, start, lastEndIndex, token.color, chunkType, isChunkStart));
b69ab31	203	lastChunkType = chunkType;
b69ab31	204	}
b69ab31	205
b69ab31	206	// Check if last span needs to have isChunkEnd set.
b69ab31	207	if (lastChunkType !== 'U') {
b69ab31	208	spans[spans.length - 1].isChunkEnd = true;
b69ab31	209	}
b69ab31	210
b69ab31	211	return spans.map(ChunkSpan);
b69ab31	212	}
b69ab31	213
b69ab31	214	/**
b69ab31	215	* Creates the <span> with the appropriate CSS classes to display a portion of
b69ab31	216	* an intraline diff with syntax highlighting.
b69ab31	217	*/
b69ab31	218	function createSpan(
b69ab31	219	line: string,
b69ab31	220	start: number,
b69ab31	221	end: number,
b69ab31	222	color: number,
b69ab31	223	type: ChunkType,
b69ab31	224	isChunkStart: boolean,
b69ab31	225	): ChunkSpanProps {
b69ab31	226	let patchClass;
b69ab31	227	switch (type) {
b69ab31	228	case 'U':
b69ab31	229	patchClass = '';
b69ab31	230	break;
b69ab31	231	case 'A':
b69ab31	232	patchClass = ' patch-add-word';
b69ab31	233	break;
b69ab31	234	case 'R':
b69ab31	235	patchClass = ' patch-remove-word';
b69ab31	236	break;
b69ab31	237	}
b69ab31	238
b69ab31	239	const className = `${CSS_CLASS_PREFIX}${color}${patchClass}`;
b69ab31	240	return {
b69ab31	241	key: start,
b69ab31	242	className,
b69ab31	243	content: line.slice(start, end),
b69ab31	244	isChunkStart,
b69ab31	245	isChunkEnd: false,
b69ab31	246	};
b69ab31	247	}
b69ab31	248
b69ab31	249	export function applyTokenizationToLine(
b69ab31	250	line: string,
b69ab31	251	tokenization: readonly HighlightedToken[],
b69ab31	252	): React.ReactFragment {
b69ab31	253	return tokenization.map(({start, end, color}) => {
b69ab31	254	return (
b69ab31	255	<span key={start} className={`${CSS_CLASS_PREFIX}${color}`}>
b69ab31	256	{line.slice(start, end)}
b69ab31	257	</span>
b69ab31	258	);
b69ab31	259	});
b69ab31	260	}