| b69ab31 | | | 1 | /** |
| b69ab31 | | | 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| b69ab31 | | | 3 | * |
| b69ab31 | | | 4 | * This source code is licensed under the MIT license found in the |
| b69ab31 | | | 5 | * LICENSE file in the root directory of this source tree. |
| b69ab31 | | | 6 | */ |
| b69ab31 | | | 7 | |
| b69ab31 | | | 8 | import type {IGrammar} from 'vscode-textmate'; |
| b69ab31 | | | 9 | |
| b69ab31 | | | 10 | import {INITIAL} from 'vscode-textmate'; |
| b69ab31 | | | 11 | |
| b69ab31 | | | 12 | // The following values come from the MetadataConsts enum in vscode-textmate. |
| b69ab31 | | | 13 | // Although they are declared in the main.d.ts file, our TypeScript/Webpack |
| b69ab31 | | | 14 | // setup does not appear to be able to inline them properly. |
| b69ab31 | | | 15 | const FOREGROUND_MASK = 8372224; |
| b69ab31 | | | 16 | const FOREGROUND_OFFSET = 14; |
| b69ab31 | | | 17 | |
| b69ab31 | | | 18 | /** |
| b69ab31 | | | 19 | * Specify a timeout when tokenizing a line to prevent a long line from locking |
| b69ab31 | | | 20 | * up the main thread. Note this is used in VS Code: |
| b69ab31 | | | 21 | * https://github.com/microsoft/vscode/blob/504c5a768a001b2099dd2b44e9dc39e10ccdfb56/src/vs/workbench/services/textMate/common/TMTokenization.ts#L39 |
| b69ab31 | | | 22 | */ |
| b69ab31 | | | 23 | const DEFAULT_TOKENIZE_TIMEOUT_MS = 500; |
| b69ab31 | | | 24 | |
| b69ab31 | | | 25 | export type HighlightedToken = { |
| b69ab31 | | | 26 | /** Start index within a line, inclusive. */ |
| b69ab31 | | | 27 | start: number; |
| b69ab31 | | | 28 | |
| b69ab31 | | | 29 | /** End index within a line, exclusive. */ |
| b69ab31 | | | 30 | end: number; |
| b69ab31 | | | 31 | |
| b69ab31 | | | 32 | /** Index into a color map. */ |
| b69ab31 | | | 33 | color: number; |
| b69ab31 | | | 34 | }; |
| b69ab31 | | | 35 | |
| b69ab31 | | | 36 | export function tokenizeFileContents( |
| b69ab31 | | | 37 | fileContents: string, |
| b69ab31 | | | 38 | grammar: IGrammar, |
| b69ab31 | | | 39 | timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS, |
| b69ab31 | | | 40 | ): Array<Array<HighlightedToken>> { |
| b69ab31 | | | 41 | // As fileContents could be quite large, we are assuming that, even though |
| b69ab31 | | | 42 | // split() generates a potentially large array, because it is one native |
| b69ab31 | | | 43 | // call, it is likely to be more efficient than us doing our own bookkeeping |
| b69ab31 | | | 44 | // to slice off one substring at a time (though that would avoid the array |
| b69ab31 | | | 45 | // allocation). |
| b69ab31 | | | 46 | return tokenizeLines(fileContents.split('\n'), grammar, timeLimit); |
| b69ab31 | | | 47 | } |
| b69ab31 | | | 48 | |
| b69ab31 | | | 49 | export function tokenizeLines( |
| b69ab31 | | | 50 | lines: ReadonlyArray<string>, |
| b69ab31 | | | 51 | grammar: IGrammar, |
| b69ab31 | | | 52 | timeLimit: number = DEFAULT_TOKENIZE_TIMEOUT_MS, |
| b69ab31 | | | 53 | ): Array<Array<HighlightedToken>> { |
| b69ab31 | | | 54 | let state = INITIAL; |
| b69ab31 | | | 55 | return lines.map((line: string) => { |
| b69ab31 | | | 56 | // Line-processing logic taken from: |
| b69ab31 | | | 57 | // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/tests/themedTokenizer.ts#L20-L41 |
| b69ab31 | | | 58 | const result = grammar.tokenizeLine2(line, state, timeLimit); |
| b69ab31 | | | 59 | |
| b69ab31 | | | 60 | // Note that even if `result.stoppedEarly` is true, we still use the list of |
| b69ab31 | | | 61 | // tokens that were returned to tokenize as much of the line as possible. |
| b69ab31 | | | 62 | // eslint-disable-next-line no-bitwise |
| b69ab31 | | | 63 | const tokensLength = result.tokens.length >> 1; |
| b69ab31 | | | 64 | const singleLine = []; |
| b69ab31 | | | 65 | for (let j = 0; j < tokensLength; j++) { |
| b69ab31 | | | 66 | const startIndex = result.tokens[2 * j]; |
| b69ab31 | | | 67 | const nextStartIndex = j + 1 < tokensLength ? result.tokens[2 * j + 2] : line.length; |
| b69ab31 | | | 68 | const tokenText = line.substring(startIndex, nextStartIndex); |
| b69ab31 | | | 69 | if (tokenText === '') { |
| b69ab31 | | | 70 | continue; |
| b69ab31 | | | 71 | } |
| b69ab31 | | | 72 | |
| b69ab31 | | | 73 | const metaData = result.tokens[2 * j + 1]; |
| b69ab31 | | | 74 | |
| b69ab31 | | | 75 | // Get foreground index from metaData so that we can index into TokensCSS |
| b69ab31 | | | 76 | // (a map from className to styles). Note this code comes from: |
| b69ab31 | | | 77 | // https://github.com/microsoft/vscode-textmate/blob/cc8ae321cfb47940470bd82c87a8ac61366fbd80/src/grammar.ts#L1032-L1034 |
| b69ab31 | | | 78 | // We have to inline it here because StackElementMetadata does not appear |
| b69ab31 | | | 79 | // to be exported as part of the vscode-textmate npm module. |
| b69ab31 | | | 80 | // eslint-disable-next-line no-bitwise |
| b69ab31 | | | 81 | const foregroundIdx = (metaData & FOREGROUND_MASK) >>> FOREGROUND_OFFSET; |
| b69ab31 | | | 82 | |
| b69ab31 | | | 83 | singleLine.push({ |
| b69ab31 | | | 84 | start: startIndex, |
| b69ab31 | | | 85 | end: nextStartIndex, |
| b69ab31 | | | 86 | color: foregroundIdx, |
| b69ab31 | | | 87 | }); |
| b69ab31 | | | 88 | } |
| b69ab31 | | | 89 | |
| b69ab31 | | | 90 | // If we get result.stoppedEarly, continue tokenizing using the state used |
| b69ab31 | | | 91 | // to tokenize this line as a "best guess" of what state tokenizing this |
| b69ab31 | | | 92 | // line would have left us in had it completed. |
| b69ab31 | | | 93 | state = result.stoppedEarly ? state : result.ruleStack; |
| b69ab31 | | | 94 | return singleLine; |
| b69ab31 | | | 95 | }); |
| b69ab31 | | | 96 | } |