7.7 KB261 lines
Blame
1/**
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8import type {HighlightedToken} from './textmate-lib/tokenize';
9
10import {diffWordsWithSpace} from 'diff';
11import {CSS_CLASS_PREFIX} from './textmate-lib/textmateStyles';
12
13/** Type of Chunk: Added, Removed, Unmodified. */
14type ChunkType = 'A' | 'R' | 'U';
15
16/**
17 * The Myers difference algorithm used by the `diff` Node module is O(ND) where
18 * N is the sum of the lengths of the two inputs and D is size of the minimum
19 * edit script for the two inputs. As such, large values of N can result in
20 * extremely long running times while likely providing little value for the
21 * user. For example, a large blob of JSON on a single line compared with the
22 * first line of the pretty-printed version (containing only `{`) could be an
23 * expensive diff to compute while telling the user nothing of interest.
24 *
25 * To defend against such pathological cases, we should not bother to compute
26 * the intraline diff in certain cases. As an initial heuristic, we impose a
27 * threshold for the "maximum input" (i.e., the sum of the lengths of the
28 * strings being compared) for which an intraline diff should be computed.
29 *
30 * Incidentally, tokenization for syntax highlighting has similar issues. At
31 * least as of Oct 2022, rather than impose a "max line length," VS Code imposes
32 * a "time spent" threshold:
33 *
34 * https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39
35 *
36 * It might be worth considering something similar for intraline diffs.
37 */
38export const MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF = 300;
39
40/**
41 * Normalized version of a `diff.Change` returned by `diffWords()` that is
42 * easier to work with when interleaving with syntax highlight information.
43 */
44type Chunk = {
45 type: ChunkType;
46 start: number;
47 end: number;
48};
49
50/**
51 * Takes a modified line in the form of `beforeLine` and `afterLine` along with
52 * syntax highlighting information that covers each respective line and
53 * returns a ReactFragment to display the before/after versions of a line in a
54 * side-by-side diff.
55 */
56export function createTokenizedIntralineDiff(
57 beforeLine: string,
58 beforeTokens: HighlightedToken[],
59 afterLine: string,
60 afterTokens: HighlightedToken[],
61): [React.ReactFragment | null, React.ReactFragment | null] {
62 if (beforeLine.length + afterLine.length > MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF) {
63 return [
64 applyTokenizationToLine(beforeLine, beforeTokens),
65 applyTokenizationToLine(afterLine, afterTokens),
66 ];
67 }
68
69 const changes = diffWordsWithSpace(beforeLine, afterLine);
70 const beforeChunks: Chunk[] = [];
71 const afterChunks: Chunk[] = [];
72 let beforeLength = 0;
73 let afterLength = 0;
74
75 changes.forEach(change => {
76 const {added, removed, value} = change;
77 const len = value.length;
78 if (added) {
79 const end = afterLength + len;
80 addOrExtend(afterChunks, 'A', afterLength, end);
81 afterLength = end;
82 } else if (removed) {
83 const end = beforeLength + len;
84 addOrExtend(beforeChunks, 'R', beforeLength, end);
85 beforeLength = end;
86 } else {
87 const beforeEnd = beforeLength + len;
88 addOrExtend(beforeChunks, 'U', beforeLength, beforeEnd);
89 beforeLength = beforeEnd;
90
91 const afterEnd = afterLength + len;
92 addOrExtend(afterChunks, 'U', afterLength, afterEnd);
93 afterLength = afterEnd;
94 }
95 });
96
97 // Note that the logic in mergeChunksAndTokens() could be done as part of this
98 // function to avoid an additional pass over the chunks, but the bookkeeping
99 // might get messy.
100 return [
101 mergeChunksAndTokens(beforeLine, beforeChunks, beforeTokens),
102 mergeChunksAndTokens(afterLine, afterChunks, afterTokens),
103 ];
104}
105
106function addOrExtend(chunks: Chunk[], type: ChunkType, start: number, end: number) {
107 const lastEntry = chunks[chunks.length - 1];
108 if (lastEntry?.type === type && lastEntry?.end === start) {
109 lastEntry.end = end;
110 } else {
111 chunks.push({type, start, end});
112 }
113}
114
115/** TODO: Create proper machinery to strip assertions from production builds. */
116const ENABLE_ASSERTIONS = false;
117
118type ChunkSpanProps = {
119 key: number;
120 className: string;
121 content: string;
122 isChunkStart: boolean;
123 isChunkEnd: boolean;
124};
125
126function ChunkSpan({
127 key,
128 className,
129 content,
130 isChunkStart,
131 isChunkEnd,
132}: ChunkSpanProps): React.ReactNode {
133 let fullClassName = className;
134 if (isChunkStart) {
135 fullClassName += ' patch-word-begin';
136 }
137 if (isChunkEnd) {
138 fullClassName += ' patch-word-end';
139 }
140 return (
141 <span key={key} className={fullClassName}>
142 {content}
143 </span>
144 );
145}
146
147/**
148 * Interleave chunks and tokens to produce a properly styled intraline diff.
149 */
150function mergeChunksAndTokens(
151 line: string,
152 chunks: Chunk[],
153 tokens: HighlightedToken[],
154): React.ReactFragment | null {
155 if (tokens.length == 0) {
156 return null;
157 }
158
159 if (ENABLE_ASSERTIONS) {
160 // We expect the following invariants to hold, by construction.
161 // eslint-disable-next-line no-console
162 console.assert(
163 chunks.length !== 0,
164 'chunks is never empty, even if the line is the empty string',
165 );
166 // eslint-disable-next-line no-console
167 console.assert(
168 chunks[chunks.length - 1].end === tokens[tokens.length - 1].end,
169 'the final chunk and token must have the same end index to ensure the loop breaks properly',
170 );
171 }
172
173 const spans: ChunkSpanProps[] = [];
174 let chunkIndex = 0;
175 let tokenIndex = 0;
176 let lastEndIndex = 0;
177 let lastChunkType: ChunkType = 'U';
178 let isChunkStart = false;
179 const maxChunkIndex = chunks.length;
180 const maxTokenIndex = tokens.length;
181 while (chunkIndex < maxChunkIndex && tokenIndex < maxTokenIndex) {
182 const chunk = chunks[chunkIndex];
183 const token = tokens[tokenIndex];
184 const start = lastEndIndex;
185 if (chunk.end === token.end) {
186 lastEndIndex = chunk.end;
187 ++chunkIndex;
188 ++tokenIndex;
189 } else if (chunk.end < token.end) {
190 lastEndIndex = chunk.end;
191 ++chunkIndex;
192 } else {
193 lastEndIndex = token.end;
194 ++tokenIndex;
195 }
196
197 const chunkType = chunk.type;
198 if (lastChunkType !== 'U' && chunkType !== lastChunkType) {
199 spans[spans.length - 1].isChunkEnd = true;
200 }
201 isChunkStart = chunkType !== 'U' && (chunkType !== lastChunkType || spans.length === 0);
202 spans.push(createSpan(line, start, lastEndIndex, token.color, chunkType, isChunkStart));
203 lastChunkType = chunkType;
204 }
205
206 // Check if last span needs to have isChunkEnd set.
207 if (lastChunkType !== 'U') {
208 spans[spans.length - 1].isChunkEnd = true;
209 }
210
211 return spans.map(ChunkSpan);
212}
213
214/**
215 * Creates the <span> with the appropriate CSS classes to display a portion of
216 * an intraline diff with syntax highlighting.
217 */
218function createSpan(
219 line: string,
220 start: number,
221 end: number,
222 color: number,
223 type: ChunkType,
224 isChunkStart: boolean,
225): ChunkSpanProps {
226 let patchClass;
227 switch (type) {
228 case 'U':
229 patchClass = '';
230 break;
231 case 'A':
232 patchClass = ' patch-add-word';
233 break;
234 case 'R':
235 patchClass = ' patch-remove-word';
236 break;
237 }
238
239 const className = `${CSS_CLASS_PREFIX}${color}${patchClass}`;
240 return {
241 key: start,
242 className,
243 content: line.slice(start, end),
244 isChunkStart,
245 isChunkEnd: false,
246 };
247}
248
249export function applyTokenizationToLine(
250 line: string,
251 tokenization: readonly HighlightedToken[],
252): React.ReactFragment {
253 return tokenization.map(({start, end, color}) => {
254 return (
255 <span key={start} className={`${CSS_CLASS_PREFIX}${color}`}>
256 {line.slice(start, end)}
257 </span>
258 );
259 });
260}
261