| b69ab31 | | | 1 | /** |
| b69ab31 | | | 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| b69ab31 | | | 3 | * |
| b69ab31 | | | 4 | * This source code is licensed under the MIT license found in the |
| b69ab31 | | | 5 | * LICENSE file in the root directory of this source tree. |
| b69ab31 | | | 6 | */ |
| b69ab31 | | | 7 | |
| b69ab31 | | | 8 | import type {HighlightedToken} from './textmate-lib/tokenize'; |
| b69ab31 | | | 9 | |
| b69ab31 | | | 10 | import {diffWordsWithSpace} from 'diff'; |
| b69ab31 | | | 11 | import {CSS_CLASS_PREFIX} from './textmate-lib/textmateStyles'; |
| b69ab31 | | | 12 | |
| b69ab31 | | | 13 | /** Type of Chunk: Added, Removed, Unmodified. */ |
| b69ab31 | | | 14 | type ChunkType = 'A' | 'R' | 'U'; |
| b69ab31 | | | 15 | |
| b69ab31 | | | 16 | /** |
| b69ab31 | | | 17 | * The Myers difference algorithm used by the `diff` Node module is O(ND) where |
| b69ab31 | | | 18 | * N is the sum of the lengths of the two inputs and D is size of the minimum |
| b69ab31 | | | 19 | * edit script for the two inputs. As such, large values of N can result in |
| b69ab31 | | | 20 | * extremely long running times while likely providing little value for the |
| b69ab31 | | | 21 | * user. For example, a large blob of JSON on a single line compared with the |
| b69ab31 | | | 22 | * first line of the pretty-printed version (containing only `{`) could be an |
| b69ab31 | | | 23 | * expensive diff to compute while telling the user nothing of interest. |
| b69ab31 | | | 24 | * |
| b69ab31 | | | 25 | * To defend against such pathological cases, we should not bother to compute |
| b69ab31 | | | 26 | * the intraline diff in certain cases. As an initial heuristic, we impose a |
| b69ab31 | | | 27 | * threshold for the "maximum input" (i.e., the sum of the lengths of the |
| b69ab31 | | | 28 | * strings being compared) for which an intraline diff should be computed. |
| b69ab31 | | | 29 | * |
| b69ab31 | | | 30 | * Incidentally, tokenization for syntax highlighting has similar issues. At |
| b69ab31 | | | 31 | * least as of Oct 2022, rather than impose a "max line length," VS Code imposes |
| b69ab31 | | | 32 | * a "time spent" threshold: |
| b69ab31 | | | 33 | * |
| b69ab31 | | | 34 | * https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39 |
| b69ab31 | | | 35 | * |
| b69ab31 | | | 36 | * It might be worth considering something similar for intraline diffs. |
| b69ab31 | | | 37 | */ |
| b69ab31 | | | 38 | export const MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF = 300; |
| b69ab31 | | | 39 | |
| b69ab31 | | | 40 | /** |
| b69ab31 | | | 41 | * Normalized version of a `diff.Change` returned by `diffWords()` that is |
| b69ab31 | | | 42 | * easier to work with when interleaving with syntax highlight information. |
| b69ab31 | | | 43 | */ |
| b69ab31 | | | 44 | type Chunk = { |
| b69ab31 | | | 45 | type: ChunkType; |
| b69ab31 | | | 46 | start: number; |
| b69ab31 | | | 47 | end: number; |
| b69ab31 | | | 48 | }; |
| b69ab31 | | | 49 | |
| b69ab31 | | | 50 | /** |
| b69ab31 | | | 51 | * Takes a modified line in the form of `beforeLine` and `afterLine` along with |
| b69ab31 | | | 52 | * syntax highlighting information that covers each respective line and |
| b69ab31 | | | 53 | * returns a ReactFragment to display the before/after versions of a line in a |
| b69ab31 | | | 54 | * side-by-side diff. |
| b69ab31 | | | 55 | */ |
| b69ab31 | | | 56 | export function createTokenizedIntralineDiff( |
| b69ab31 | | | 57 | beforeLine: string, |
| b69ab31 | | | 58 | beforeTokens: HighlightedToken[], |
| b69ab31 | | | 59 | afterLine: string, |
| b69ab31 | | | 60 | afterTokens: HighlightedToken[], |
| b69ab31 | | | 61 | ): [React.ReactFragment | null, React.ReactFragment | null] { |
| b69ab31 | | | 62 | if (beforeLine.length + afterLine.length > MAX_INPUT_LENGTH_FOR_INTRALINE_DIFF) { |
| b69ab31 | | | 63 | return [ |
| b69ab31 | | | 64 | applyTokenizationToLine(beforeLine, beforeTokens), |
| b69ab31 | | | 65 | applyTokenizationToLine(afterLine, afterTokens), |
| b69ab31 | | | 66 | ]; |
| b69ab31 | | | 67 | } |
| b69ab31 | | | 68 | |
| b69ab31 | | | 69 | const changes = diffWordsWithSpace(beforeLine, afterLine); |
| b69ab31 | | | 70 | const beforeChunks: Chunk[] = []; |
| b69ab31 | | | 71 | const afterChunks: Chunk[] = []; |
| b69ab31 | | | 72 | let beforeLength = 0; |
| b69ab31 | | | 73 | let afterLength = 0; |
| b69ab31 | | | 74 | |
| b69ab31 | | | 75 | changes.forEach(change => { |
| b69ab31 | | | 76 | const {added, removed, value} = change; |
| b69ab31 | | | 77 | const len = value.length; |
| b69ab31 | | | 78 | if (added) { |
| b69ab31 | | | 79 | const end = afterLength + len; |
| b69ab31 | | | 80 | addOrExtend(afterChunks, 'A', afterLength, end); |
| b69ab31 | | | 81 | afterLength = end; |
| b69ab31 | | | 82 | } else if (removed) { |
| b69ab31 | | | 83 | const end = beforeLength + len; |
| b69ab31 | | | 84 | addOrExtend(beforeChunks, 'R', beforeLength, end); |
| b69ab31 | | | 85 | beforeLength = end; |
| b69ab31 | | | 86 | } else { |
| b69ab31 | | | 87 | const beforeEnd = beforeLength + len; |
| b69ab31 | | | 88 | addOrExtend(beforeChunks, 'U', beforeLength, beforeEnd); |
| b69ab31 | | | 89 | beforeLength = beforeEnd; |
| b69ab31 | | | 90 | |
| b69ab31 | | | 91 | const afterEnd = afterLength + len; |
| b69ab31 | | | 92 | addOrExtend(afterChunks, 'U', afterLength, afterEnd); |
| b69ab31 | | | 93 | afterLength = afterEnd; |
| b69ab31 | | | 94 | } |
| b69ab31 | | | 95 | }); |
| b69ab31 | | | 96 | |
| b69ab31 | | | 97 | // Note that the logic in mergeChunksAndTokens() could be done as part of this |
| b69ab31 | | | 98 | // function to avoid an additional pass over the chunks, but the bookkeeping |
| b69ab31 | | | 99 | // might get messy. |
| b69ab31 | | | 100 | return [ |
| b69ab31 | | | 101 | mergeChunksAndTokens(beforeLine, beforeChunks, beforeTokens), |
| b69ab31 | | | 102 | mergeChunksAndTokens(afterLine, afterChunks, afterTokens), |
| b69ab31 | | | 103 | ]; |
| b69ab31 | | | 104 | } |
| b69ab31 | | | 105 | |
| b69ab31 | | | 106 | function addOrExtend(chunks: Chunk[], type: ChunkType, start: number, end: number) { |
| b69ab31 | | | 107 | const lastEntry = chunks[chunks.length - 1]; |
| b69ab31 | | | 108 | if (lastEntry?.type === type && lastEntry?.end === start) { |
| b69ab31 | | | 109 | lastEntry.end = end; |
| b69ab31 | | | 110 | } else { |
| b69ab31 | | | 111 | chunks.push({type, start, end}); |
| b69ab31 | | | 112 | } |
| b69ab31 | | | 113 | } |
| b69ab31 | | | 114 | |
| b69ab31 | | | 115 | /** TODO: Create proper machinery to strip assertions from production builds. */ |
| b69ab31 | | | 116 | const ENABLE_ASSERTIONS = false; |
| b69ab31 | | | 117 | |
| b69ab31 | | | 118 | type ChunkSpanProps = { |
| b69ab31 | | | 119 | key: number; |
| b69ab31 | | | 120 | className: string; |
| b69ab31 | | | 121 | content: string; |
| b69ab31 | | | 122 | isChunkStart: boolean; |
| b69ab31 | | | 123 | isChunkEnd: boolean; |
| b69ab31 | | | 124 | }; |
| b69ab31 | | | 125 | |
| b69ab31 | | | 126 | function ChunkSpan({ |
| b69ab31 | | | 127 | key, |
| b69ab31 | | | 128 | className, |
| b69ab31 | | | 129 | content, |
| b69ab31 | | | 130 | isChunkStart, |
| b69ab31 | | | 131 | isChunkEnd, |
| b69ab31 | | | 132 | }: ChunkSpanProps): React.ReactNode { |
| b69ab31 | | | 133 | let fullClassName = className; |
| b69ab31 | | | 134 | if (isChunkStart) { |
| b69ab31 | | | 135 | fullClassName += ' patch-word-begin'; |
| b69ab31 | | | 136 | } |
| b69ab31 | | | 137 | if (isChunkEnd) { |
| b69ab31 | | | 138 | fullClassName += ' patch-word-end'; |
| b69ab31 | | | 139 | } |
| b69ab31 | | | 140 | return ( |
| b69ab31 | | | 141 | <span key={key} className={fullClassName}> |
| b69ab31 | | | 142 | {content} |
| b69ab31 | | | 143 | </span> |
| b69ab31 | | | 144 | ); |
| b69ab31 | | | 145 | } |
| b69ab31 | | | 146 | |
| b69ab31 | | | 147 | /** |
| b69ab31 | | | 148 | * Interleave chunks and tokens to produce a properly styled intraline diff. |
| b69ab31 | | | 149 | */ |
| b69ab31 | | | 150 | function mergeChunksAndTokens( |
| b69ab31 | | | 151 | line: string, |
| b69ab31 | | | 152 | chunks: Chunk[], |
| b69ab31 | | | 153 | tokens: HighlightedToken[], |
| b69ab31 | | | 154 | ): React.ReactFragment | null { |
| b69ab31 | | | 155 | if (tokens.length == 0) { |
| b69ab31 | | | 156 | return null; |
| b69ab31 | | | 157 | } |
| b69ab31 | | | 158 | |
| b69ab31 | | | 159 | if (ENABLE_ASSERTIONS) { |
| b69ab31 | | | 160 | // We expect the following invariants to hold, by construction. |
| b69ab31 | | | 161 | // eslint-disable-next-line no-console |
| b69ab31 | | | 162 | console.assert( |
| b69ab31 | | | 163 | chunks.length !== 0, |
| b69ab31 | | | 164 | 'chunks is never empty, even if the line is the empty string', |
| b69ab31 | | | 165 | ); |
| b69ab31 | | | 166 | // eslint-disable-next-line no-console |
| b69ab31 | | | 167 | console.assert( |
| b69ab31 | | | 168 | chunks[chunks.length - 1].end === tokens[tokens.length - 1].end, |
| b69ab31 | | | 169 | 'the final chunk and token must have the same end index to ensure the loop breaks properly', |
| b69ab31 | | | 170 | ); |
| b69ab31 | | | 171 | } |
| b69ab31 | | | 172 | |
| b69ab31 | | | 173 | const spans: ChunkSpanProps[] = []; |
| b69ab31 | | | 174 | let chunkIndex = 0; |
| b69ab31 | | | 175 | let tokenIndex = 0; |
| b69ab31 | | | 176 | let lastEndIndex = 0; |
| b69ab31 | | | 177 | let lastChunkType: ChunkType = 'U'; |
| b69ab31 | | | 178 | let isChunkStart = false; |
| b69ab31 | | | 179 | const maxChunkIndex = chunks.length; |
| b69ab31 | | | 180 | const maxTokenIndex = tokens.length; |
| b69ab31 | | | 181 | while (chunkIndex < maxChunkIndex && tokenIndex < maxTokenIndex) { |
| b69ab31 | | | 182 | const chunk = chunks[chunkIndex]; |
| b69ab31 | | | 183 | const token = tokens[tokenIndex]; |
| b69ab31 | | | 184 | const start = lastEndIndex; |
| b69ab31 | | | 185 | if (chunk.end === token.end) { |
| b69ab31 | | | 186 | lastEndIndex = chunk.end; |
| b69ab31 | | | 187 | ++chunkIndex; |
| b69ab31 | | | 188 | ++tokenIndex; |
| b69ab31 | | | 189 | } else if (chunk.end < token.end) { |
| b69ab31 | | | 190 | lastEndIndex = chunk.end; |
| b69ab31 | | | 191 | ++chunkIndex; |
| b69ab31 | | | 192 | } else { |
| b69ab31 | | | 193 | lastEndIndex = token.end; |
| b69ab31 | | | 194 | ++tokenIndex; |
| b69ab31 | | | 195 | } |
| b69ab31 | | | 196 | |
| b69ab31 | | | 197 | const chunkType = chunk.type; |
| b69ab31 | | | 198 | if (lastChunkType !== 'U' && chunkType !== lastChunkType) { |
| b69ab31 | | | 199 | spans[spans.length - 1].isChunkEnd = true; |
| b69ab31 | | | 200 | } |
| b69ab31 | | | 201 | isChunkStart = chunkType !== 'U' && (chunkType !== lastChunkType || spans.length === 0); |
| b69ab31 | | | 202 | spans.push(createSpan(line, start, lastEndIndex, token.color, chunkType, isChunkStart)); |
| b69ab31 | | | 203 | lastChunkType = chunkType; |
| b69ab31 | | | 204 | } |
| b69ab31 | | | 205 | |
| b69ab31 | | | 206 | // Check if last span needs to have isChunkEnd set. |
| b69ab31 | | | 207 | if (lastChunkType !== 'U') { |
| b69ab31 | | | 208 | spans[spans.length - 1].isChunkEnd = true; |
| b69ab31 | | | 209 | } |
| b69ab31 | | | 210 | |
| b69ab31 | | | 211 | return spans.map(ChunkSpan); |
| b69ab31 | | | 212 | } |
| b69ab31 | | | 213 | |
| b69ab31 | | | 214 | /** |
| b69ab31 | | | 215 | * Creates the <span> with the appropriate CSS classes to display a portion of |
| b69ab31 | | | 216 | * an intraline diff with syntax highlighting. |
| b69ab31 | | | 217 | */ |
| b69ab31 | | | 218 | function createSpan( |
| b69ab31 | | | 219 | line: string, |
| b69ab31 | | | 220 | start: number, |
| b69ab31 | | | 221 | end: number, |
| b69ab31 | | | 222 | color: number, |
| b69ab31 | | | 223 | type: ChunkType, |
| b69ab31 | | | 224 | isChunkStart: boolean, |
| b69ab31 | | | 225 | ): ChunkSpanProps { |
| b69ab31 | | | 226 | let patchClass; |
| b69ab31 | | | 227 | switch (type) { |
| b69ab31 | | | 228 | case 'U': |
| b69ab31 | | | 229 | patchClass = ''; |
| b69ab31 | | | 230 | break; |
| b69ab31 | | | 231 | case 'A': |
| b69ab31 | | | 232 | patchClass = ' patch-add-word'; |
| b69ab31 | | | 233 | break; |
| b69ab31 | | | 234 | case 'R': |
| b69ab31 | | | 235 | patchClass = ' patch-remove-word'; |
| b69ab31 | | | 236 | break; |
| b69ab31 | | | 237 | } |
| b69ab31 | | | 238 | |
| b69ab31 | | | 239 | const className = `${CSS_CLASS_PREFIX}${color}${patchClass}`; |
| b69ab31 | | | 240 | return { |
| b69ab31 | | | 241 | key: start, |
| b69ab31 | | | 242 | className, |
| b69ab31 | | | 243 | content: line.slice(start, end), |
| b69ab31 | | | 244 | isChunkStart, |
| b69ab31 | | | 245 | isChunkEnd: false, |
| b69ab31 | | | 246 | }; |
| b69ab31 | | | 247 | } |
| b69ab31 | | | 248 | |
| b69ab31 | | | 249 | export function applyTokenizationToLine( |
| b69ab31 | | | 250 | line: string, |
| b69ab31 | | | 251 | tokenization: readonly HighlightedToken[], |
| b69ab31 | | | 252 | ): React.ReactFragment { |
| b69ab31 | | | 253 | return tokenization.map(({start, end, color}) => { |
| b69ab31 | | | 254 | return ( |
| b69ab31 | | | 255 | <span key={start} className={`${CSS_CLASS_PREFIX}${color}`}> |
| b69ab31 | | | 256 | {line.slice(start, end)} |
| b69ab31 | | | 257 | </span> |
| b69ab31 | | | 258 | ); |
| b69ab31 | | | 259 | }); |
| b69ab31 | | | 260 | } |