Blame · tokenize.ts

addons/shared/textmate-lib/tokenize.tsblame

b69ab31	1	/**
b69ab31	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab31	3	*
b69ab31	4	* This source code is licensed under the MIT license found in the
b69ab31	5	* LICENSE file in the root directory of this source tree.
b69ab31	6	*/
b69ab31	7
b69ab31	8	import type {IGrammar} from 'vscode-textmate';
b69ab31	9
b69ab31	10	import {INITIAL} from 'vscode-textmate';
b69ab31	11
b69ab31	12	// The following values come from the MetadataConsts enum in vscode-textmate.
b69ab31	13	// Although they are declared in the main.d.ts file, our TypeScript/Webpack
b69ab31	14	// setup does not appear to be able to inline them properly.
b69ab31	15	const FOREGROUND_MASK = 8372224;
b69ab31	16	const FOREGROUND_OFFSET = 14;
b69ab31	17
b69ab31	18	/**
b69ab31	19	* Specify a timeout when tokenizing a line to prevent a long line from locking
b69ab31	20	* up the main thread. Note this is used in VS Code:
b69ab31	21	* https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39
b69ab31	22	*/
b69ab31	23	const DEFAULT_TOKENIZE_TIMEOUT_MS = 500;
b69ab31	24
b69ab31	25	export type HighlightedToken = {
b69ab31	26	/** Start index within a line, inclusive. */
b69ab31	27	start: number;
b69ab31	28
b69ab31	29	/** End index within a line, exclusive. */
b69ab31	30	end: number;
b69ab31	31
b69ab31	32	/** Index into a color map. */
b69ab31	33	color: number;
b69ab31	34	};
b69ab31	35
b69ab31	36	export function tokenizeFileContents(
b69ab31	37	fileContents: string,
b69ab31	38	grammar: IGrammar,
b69ab31	39	timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS,
b69ab31	40	): Array<Array<HighlightedToken>> {
b69ab31	41	// As fileContents could be quite large, we are assuming that, even though
b69ab31	42	// split() generates a potentially large array, because it is one native
b69ab31	43	// call, it is likely to be more efficient than us doing our own bookkeeping
b69ab31	44	// to slice off one substring at a time (though that would avoid the array
b69ab31	45	// allocation).
b69ab31	46	return tokenizeLines(fileContents.split('\n'), grammar, timeLimit);
b69ab31	47	}
b69ab31	48
b69ab31	49	export function tokenizeLines(
b69ab31	50	lines: ReadonlyArray<string>,
b69ab31	51	grammar: IGrammar,
b69ab31	52	timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS,
b69ab31	53	): Array<Array<HighlightedToken>> {
b69ab31	54	let state = INITIAL;
b69ab31	55	return lines.map((line: string) => {
b69ab31	56	// Line-processing logic taken from:
b69ab31	57	// https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/tests/themedTokenizer.ts#L20-L41
b69ab31	58	const result = grammar.tokenizeLine2(line, state, timeLimit);
b69ab31	59
b69ab31	60	// Note that even if `result.stoppedEarly` is true, we still use the list of
b69ab31	61	// tokens that were returned to tokenize as much of the line as possible.
b69ab31	62	// eslint-disable-next-line no-bitwise
b69ab31	63	const tokensLength = result.tokens.length >> 1;
b69ab31	64	const singleLine = [];
b69ab31	65	for (let j = 0; j < tokensLength; j++) {
b69ab31	66	const startIndex = result.tokens[2 * j];
b69ab31	67	const nextStartIndex = j + 1 < tokensLength ? result.tokens[2 * j + 2] : line.length;
b69ab31	68	const tokenText = line.substring(startIndex, nextStartIndex);
b69ab31	69	if (tokenText === '') {
b69ab31	70	continue;
b69ab31	71	}
b69ab31	72
b69ab31	73	const metaData = result.tokens[2 * j + 1];
b69ab31	74
b69ab31	75	// Get foreground index from metaData so that we can index into TokensCSS
b69ab31	76	// (a map from className to styles). Note this code comes from:
b69ab31	77	// https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/grammar.ts#L1032-L1034
b69ab31	78	// We have to inline it here because StackElementMetadata does not appear
b69ab31	79	// to be exported as part of the vscode-textmate npm module.
b69ab31	80	// eslint-disable-next-line no-bitwise
b69ab31	81	const foregroundIdx = (metaData & FOREGROUND_MASK) >>> FOREGROUND_OFFSET;
b69ab31	82
b69ab31	83	singleLine.push({
b69ab31	84	start: startIndex,
b69ab31	85	end: nextStartIndex,
b69ab31	86	color: foregroundIdx,
b69ab31	87	});
b69ab31	88	}
b69ab31	89
b69ab31	90	// If we get result.stoppedEarly, continue tokenizing using the state used
b69ab31	91	// to tokenize this line as a "best guess" of what state tokenizing this
b69ab31	92	// line would have left us in had it completed.
b69ab31	93	state = result.stoppedEarly ? state : result.ruleStack;
b69ab31	94	return singleLine;
b69ab31	95	});
b69ab31	96	}