addons/shared/textmate-lib/tokenize.tsblame
View source
b69ab311/**
b69ab312 * Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab313 *
b69ab314 * This source code is licensed under the MIT license found in the
b69ab315 * LICENSE file in the root directory of this source tree.
b69ab316 */
b69ab317
b69ab318import type {IGrammar} from 'vscode-textmate';
b69ab319
b69ab3110import {INITIAL} from 'vscode-textmate';
b69ab3111
b69ab3112// The following values come from the MetadataConsts enum in vscode-textmate.
b69ab3113// Although they are declared in the main.d.ts file, our TypeScript/Webpack
b69ab3114// setup does not appear to be able to inline them properly.
b69ab3115const FOREGROUND_MASK = 8372224;
b69ab3116const FOREGROUND_OFFSET = 14;
b69ab3117
b69ab3118/**
b69ab3119 * Specify a timeout when tokenizing a line to prevent a long line from locking
b69ab3120 * up the main thread. Note this is used in VS Code:
b69ab3121 * https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39
b69ab3122 */
b69ab3123const DEFAULT_TOKENIZE_TIMEOUT_MS = 500;
b69ab3124
b69ab3125export type HighlightedToken = {
b69ab3126 /** Start index within a line, inclusive. */
b69ab3127 start: number;
b69ab3128
b69ab3129 /** End index within a line, exclusive. */
b69ab3130 end: number;
b69ab3131
b69ab3132 /** Index into a color map. */
b69ab3133 color: number;
b69ab3134};
b69ab3135
b69ab3136export function tokenizeFileContents(
b69ab3137 fileContents: string,
b69ab3138 grammar: IGrammar,
b69ab3139 timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS,
b69ab3140): Array<Array<HighlightedToken>> {
b69ab3141 // As fileContents could be quite large, we are assuming that, even though
b69ab3142 // split() generates a potentially large array, because it is one native
b69ab3143 // call, it is likely to be more efficient than us doing our own bookkeeping
b69ab3144 // to slice off one substring at a time (though that would avoid the array
b69ab3145 // allocation).
b69ab3146 return tokenizeLines(fileContents.split('\n'), grammar, timeLimit);
b69ab3147}
b69ab3148
b69ab3149export function tokenizeLines(
b69ab3150 lines: ReadonlyArray<string>,
b69ab3151 grammar: IGrammar,
b69ab3152 timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS,
b69ab3153): Array<Array<HighlightedToken>> {
b69ab3154 let state = INITIAL;
b69ab3155 return lines.map((line: string) => {
b69ab3156 // Line-processing logic taken from:
b69ab3157 // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/tests/themedTokenizer.ts#L20-L41
b69ab3158 const result = grammar.tokenizeLine2(line, state, timeLimit);
b69ab3159
b69ab3160 // Note that even if `result.stoppedEarly` is true, we still use the list of
b69ab3161 // tokens that were returned to tokenize as much of the line as possible.
b69ab3162 // eslint-disable-next-line no-bitwise
b69ab3163 const tokensLength = result.tokens.length >> 1;
b69ab3164 const singleLine = [];
b69ab3165 for (let j = 0; j < tokensLength; j++) {
b69ab3166 const startIndex = result.tokens[2 * j];
b69ab3167 const nextStartIndex = j + 1 < tokensLength ? result.tokens[2 * j + 2] : line.length;
b69ab3168 const tokenText = line.substring(startIndex, nextStartIndex);
b69ab3169 if (tokenText === '') {
b69ab3170 continue;
b69ab3171 }
b69ab3172
b69ab3173 const metaData = result.tokens[2 * j + 1];
b69ab3174
b69ab3175 // Get foreground index from metaData so that we can index into TokensCSS
b69ab3176 // (a map from className to styles). Note this code comes from:
b69ab3177 // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/grammar.ts#L1032-L1034
b69ab3178 // We have to inline it here because StackElementMetadata does not appear
b69ab3179 // to be exported as part of the vscode-textmate npm module.
b69ab3180 // eslint-disable-next-line no-bitwise
b69ab3181 const foregroundIdx = (metaData & FOREGROUND_MASK) >>> FOREGROUND_OFFSET;
b69ab3182
b69ab3183 singleLine.push({
b69ab3184 start: startIndex,
b69ab3185 end: nextStartIndex,
b69ab3186 color: foregroundIdx,
b69ab3187 });
b69ab3188 }
b69ab3189
b69ab3190 // If we get result.stoppedEarly, continue tokenizing using the state used
b69ab3191 // to tokenize this line as a "best guess" of what state tokenizing this
b69ab3192 // line would have left us in had it completed.
b69ab3193 state = result.stoppedEarly ? state : result.ruleStack;
b69ab3194 return singleLine;
b69ab3195 });
b69ab3196}