3.7 KB97 lines
Blame
1/**
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8import type {IGrammar} from 'vscode-textmate';
9
10import {INITIAL} from 'vscode-textmate';
11
12// The following values come from the MetadataConsts enum in vscode-textmate.
13// Although they are declared in the main.d.ts file, our TypeScript/Webpack
14// setup does not appear to be able to inline them properly.
15const FOREGROUND_MASK = 8372224;
16const FOREGROUND_OFFSET = 14;
17
18/**
19 * Specify a timeout when tokenizing a line to prevent a long line from locking
20 * up the main thread. Note this is used in VS Code:
21 * https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39
22 */
23const DEFAULT_TOKENIZE_TIMEOUT_MS = 500;
24
25export type HighlightedToken = {
26 /** Start index within a line, inclusive. */
27 start: number;
28
29 /** End index within a line, exclusive. */
30 end: number;
31
32 /** Index into a color map. */
33 color: number;
34};
35
36export function tokenizeFileContents(
37 fileContents: string,
38 grammar: IGrammar,
39 timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS,
40): Array<Array<HighlightedToken>> {
41 // As fileContents could be quite large, we are assuming that, even though
42 // split() generates a potentially large array, because it is one native
43 // call, it is likely to be more efficient than us doing our own bookkeeping
44 // to slice off one substring at a time (though that would avoid the array
45 // allocation).
46 return tokenizeLines(fileContents.split('\n'), grammar, timeLimit);
47}
48
49export function tokenizeLines(
50 lines: ReadonlyArray<string>,
51 grammar: IGrammar,
52 timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS,
53): Array<Array<HighlightedToken>> {
54 let state = INITIAL;
55 return lines.map((line: string) => {
56 // Line-processing logic taken from:
57 // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/tests/themedTokenizer.ts#L20-L41
58 const result = grammar.tokenizeLine2(line, state, timeLimit);
59
60 // Note that even if `result.stoppedEarly` is true, we still use the list of
61 // tokens that were returned to tokenize as much of the line as possible.
62 // eslint-disable-next-line no-bitwise
63 const tokensLength = result.tokens.length >> 1;
64 const singleLine = [];
65 for (let j = 0; j < tokensLength; j++) {
66 const startIndex = result.tokens[2 * j];
67 const nextStartIndex = j + 1 < tokensLength ? result.tokens[2 * j + 2] : line.length;
68 const tokenText = line.substring(startIndex, nextStartIndex);
69 if (tokenText === '') {
70 continue;
71 }
72
73 const metaData = result.tokens[2 * j + 1];
74
75 // Get foreground index from metaData so that we can index into TokensCSS
76 // (a map from className to styles). Note this code comes from:
77 // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/grammar.ts#L1032-L1034
78 // We have to inline it here because StackElementMetadata does not appear
79 // to be exported as part of the vscode-textmate npm module.
80 // eslint-disable-next-line no-bitwise
81 const foregroundIdx = (metaData & FOREGROUND_MASK) >>> FOREGROUND_OFFSET;
82
83 singleLine.push({
84 start: startIndex,
85 end: nextStartIndex,
86 color: foregroundIdx,
87 });
88 }
89
90 // If we get result.stoppedEarly, continue tokenizing using the state used
91 // to tokenize this line as a "best guess" of what state tokenizing this
92 // line would have left us in had it completed.
93 state = result.stoppedEarly ? state : result.ruleStack;
94 return singleLine;
95 });
96}
97