| 1 | /** |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * |
| 4 | * This source code is licensed under the MIT license found in the |
| 5 | * LICENSE file in the root directory of this source tree. |
| 6 | */ |
| 7 | |
| 8 | import type {IGrammar} from 'vscode-textmate'; |
| 9 | |
| 10 | import {INITIAL} from 'vscode-textmate'; |
| 11 | |
| 12 | // The following values come from the MetadataConsts enum in vscode-textmate. |
| 13 | // Although they are declared in the main.d.ts file, our TypeScript/Webpack |
| 14 | // setup does not appear to be able to inline them properly. |
| 15 | const FOREGROUND_MASK = 8372224; |
| 16 | const FOREGROUND_OFFSET = 14; |
| 17 | |
| 18 | /** |
| 19 | * Specify a timeout when tokenizing a line to prevent a long line from locking |
| 20 | * up the main thread. Note this is used in VS Code: |
| 21 | * https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39 |
| 22 | */ |
| 23 | const DEFAULT_TOKENIZE_TIMEOUT_MS = 500; |
| 24 | |
| 25 | export type HighlightedToken = { |
| 26 | /** Start index within a line, inclusive. */ |
| 27 | start: number; |
| 28 | |
| 29 | /** End index within a line, exclusive. */ |
| 30 | end: number; |
| 31 | |
| 32 | /** Index into a color map. */ |
| 33 | color: number; |
| 34 | }; |
| 35 | |
| 36 | export function tokenizeFileContents( |
| 37 | fileContents: string, |
| 38 | grammar: IGrammar, |
| 39 | timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS, |
| 40 | ): Array<Array<HighlightedToken>> { |
| 41 | // As fileContents could be quite large, we are assuming that, even though |
| 42 | // split() generates a potentially large array, because it is one native |
| 43 | // call, it is likely to be more efficient than us doing our own bookkeeping |
| 44 | // to slice off one substring at a time (though that would avoid the array |
| 45 | // allocation). |
| 46 | return tokenizeLines(fileContents.split('\n'), grammar, timeLimit); |
| 47 | } |
| 48 | |
| 49 | export function tokenizeLines( |
| 50 | lines: ReadonlyArray<string>, |
| 51 | grammar: IGrammar, |
| 52 | timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS, |
| 53 | ): Array<Array<HighlightedToken>> { |
| 54 | let state = INITIAL; |
| 55 | return lines.map((line: string) => { |
| 56 | // Line-processing logic taken from: |
| 57 | // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/tests/themedTokenizer.ts#L20-L41 |
| 58 | const result = grammar.tokenizeLine2(line, state, timeLimit); |
| 59 | |
| 60 | // Note that even if `result.stoppedEarly` is true, we still use the list of |
| 61 | // tokens that were returned to tokenize as much of the line as possible. |
| 62 | // eslint-disable-next-line no-bitwise |
| 63 | const tokensLength = result.tokens.length >> 1; |
| 64 | const singleLine = []; |
| 65 | for (let j = 0; j < tokensLength; j++) { |
| 66 | const startIndex = result.tokens[2 * j]; |
| 67 | const nextStartIndex = j + 1 < tokensLength ? result.tokens[2 * j + 2] : line.length; |
| 68 | const tokenText = line.substring(startIndex, nextStartIndex); |
| 69 | if (tokenText === '') { |
| 70 | continue; |
| 71 | } |
| 72 | |
| 73 | const metaData = result.tokens[2 * j + 1]; |
| 74 | |
| 75 | // Get foreground index from metaData so that we can index into TokensCSS |
| 76 | // (a map from className to styles). Note this code comes from: |
| 77 | // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/grammar.ts#L1032-L1034 |
| 78 | // We have to inline it here because StackElementMetadata does not appear |
| 79 | // to be exported as part of the vscode-textmate npm module. |
| 80 | // eslint-disable-next-line no-bitwise |
| 81 | const foregroundIdx = (metaData & FOREGROUND_MASK) >>> FOREGROUND_OFFSET; |
| 82 | |
| 83 | singleLine.push({ |
| 84 | start: startIndex, |
| 85 | end: nextStartIndex, |
| 86 | color: foregroundIdx, |
| 87 | }); |
| 88 | } |
| 89 | |
| 90 | // If we get result.stoppedEarly, continue tokenizing using the state used |
| 91 | // to tokenize this line as a "best guess" of what state tokenizing this |
| 92 | // line would have left us in had it completed. |
| 93 | state = result.stoppedEarly ? state : result.ruleStack; |
| 94 | return singleLine; |
| 95 | }); |
| 96 | } |
| 97 | |