| b69ab31 | | | 1 | /** |
| b69ab31 | | | 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| b69ab31 | | | 3 | * |
| b69ab31 | | | 4 | * This source code is licensed under the MIT license found in the |
| b69ab31 | | | 5 | * LICENSE file in the root directory of this source tree. |
| b69ab31 | | | 6 | */ |
| b69ab31 | | | 7 | |
| b69ab31 | | | 8 | import type {Grammar, LanguageConfiguration} from './types'; |
| b69ab31 | | | 9 | |
| b69ab31 | | | 10 | import splitPath from './splitPath'; |
| b69ab31 | | | 11 | |
| b69ab31 | | | 12 | type LanguageIndex = { |
| b69ab31 | | | 13 | /** File name to language id. */ |
| b69ab31 | | | 14 | filenames: Map<string, string>; |
| b69ab31 | | | 15 | /** File extension to language id. */ |
| b69ab31 | | | 16 | extensions: Map<string, string>; |
| b69ab31 | | | 17 | /** Language alias to language id. */ |
| b69ab31 | | | 18 | aliases: Map<string, string>; |
| b69ab31 | | | 19 | /** All supported language ids. */ |
| b69ab31 | | | 20 | supportedLanguages: Set<string>; |
| b69ab31 | | | 21 | }; |
| b69ab31 | | | 22 | |
| b69ab31 | | | 23 | export default class FilepathClassifier { |
| b69ab31 | | | 24 | private index: LanguageIndex; |
| b69ab31 | | | 25 | |
| b69ab31 | | | 26 | constructor( |
| b69ab31 | | | 27 | private grammars: {[scopeName: string]: Grammar}, |
| b69ab31 | | | 28 | private languages: {[language: string]: LanguageConfiguration}, |
| b69ab31 | | | 29 | ) { |
| b69ab31 | | | 30 | this.index = createIndex(languages); |
| b69ab31 | | | 31 | } |
| b69ab31 | | | 32 | |
| b69ab31 | | | 33 | findScopeNameForPath(path: string): string | null { |
| b69ab31 | | | 34 | const [, filename] = splitPath(path); |
| b69ab31 | | | 35 | const language = this.findTextMateLanguage(filename); |
| b69ab31 | | | 36 | return language != null ? this.findScopeNameForLanguage(language) : null; |
| b69ab31 | | | 37 | } |
| b69ab31 | | | 38 | |
| b69ab31 | | | 39 | findScopeNameForLanguage(language: string): string | null { |
| b69ab31 | | | 40 | for (const [scopeName, grammar] of Object.entries(this.grammars)) { |
| b69ab31 | | | 41 | if (grammar.language === language) { |
| b69ab31 | | | 42 | return scopeName; |
| b69ab31 | | | 43 | } |
| b69ab31 | | | 44 | } |
| b69ab31 | | | 45 | return null; |
| b69ab31 | | | 46 | } |
| b69ab31 | | | 47 | |
| b69ab31 | | | 48 | /** |
| b69ab31 | | | 49 | * This function is useful for mapping the tag used in a fenced code block |
| b69ab31 | | | 50 | * to a scope name. For example, while the language id for JavaScript is |
| b69ab31 | | | 51 | * `javascript` according to the LSP spec, users frequently use the alias |
| b69ab31 | | | 52 | * `js` when creating fenced code blocks, so we would like to be able to map |
| b69ab31 | | | 53 | * both to the scope name `source.js`. |
| b69ab31 | | | 54 | * |
| b69ab31 | | | 55 | * Note that the TextMate grammar for Markdown hardcodes these aliases, |
| b69ab31 | | | 56 | * which is useful when displaying Markdown source in an editor: |
| b69ab31 | | | 57 | * |
| b69ab31 | | | 58 | * https://github.com/microsoft/vscode/blob/ea0e3e0d1fab/extensions/markdown-basics/syntaxes/markdown.tmLanguage.json#L960 |
| b69ab31 | | | 59 | * |
| b69ab31 | | | 60 | * But when rendering Markdown as HTML, clients often have to provide their |
| b69ab31 | | | 61 | * own syntax highlighting logic, which has to do its own mapping of the tag |
| b69ab31 | | | 62 | * for the fenced code block. For example, here is where highlight.js declares |
| b69ab31 | | | 63 | * its aliases for Javascript [sic]: |
| b69ab31 | | | 64 | * |
| b69ab31 | | | 65 | * https://github.com/highlightjs/highlight.js/blob/91e1898df92a/src/languages/javascript.js#L454 |
| b69ab31 | | | 66 | */ |
| b69ab31 | | | 67 | findScopeNameForAlias(alias: string): string | null { |
| b69ab31 | | | 68 | const language = this.index.aliases.get(alias) ?? alias; |
| b69ab31 | | | 69 | return this.findScopeNameForLanguage(language); |
| b69ab31 | | | 70 | } |
| b69ab31 | | | 71 | |
| b69ab31 | | | 72 | /** |
| b69ab31 | | | 73 | * Given a filename like `index.js` or `BUCK`, returns the language id of the |
| b69ab31 | | | 74 | * TextMate grammar that should be used to highlight it. This function does |
| b69ab31 | | | 75 | * *not* depend on Monaco, so it can be used in other contexts. |
| b69ab31 | | | 76 | */ |
| b69ab31 | | | 77 | findTextMateLanguage(filename: string): string | null { |
| b69ab31 | | | 78 | const language = this.index.filenames.get(filename); |
| b69ab31 | | | 79 | if (language != null) { |
| b69ab31 | | | 80 | return language; |
| b69ab31 | | | 81 | } |
| b69ab31 | | | 82 | |
| b69ab31 | | | 83 | for (const [extension, language] of this.index.extensions.entries()) { |
| b69ab31 | | | 84 | if (filename.endsWith(extension)) { |
| b69ab31 | | | 85 | return language; |
| b69ab31 | | | 86 | } |
| b69ab31 | | | 87 | } |
| b69ab31 | | | 88 | |
| b69ab31 | | | 89 | return null; |
| b69ab31 | | | 90 | } |
| b69ab31 | | | 91 | |
| b69ab31 | | | 92 | /** |
| b69ab31 | | | 93 | * Makes a best-effort to map the specified language id (like `fsharp`) to a |
| b69ab31 | | | 94 | * name that is more familiar to the user (like `F#`). Also supports aliases |
| b69ab31 | | | 95 | * so that both `py` and `python` are mapped to `Python`. |
| b69ab31 | | | 96 | */ |
| b69ab31 | | | 97 | getDisplayNameForLanguageId(languageIdOrAlias: string): string { |
| b69ab31 | | | 98 | const scopeName = |
| b69ab31 | | | 99 | this.findScopeNameForLanguage(languageIdOrAlias) ?? |
| b69ab31 | | | 100 | this.findScopeNameForAlias(languageIdOrAlias); |
| b69ab31 | | | 101 | if (scopeName == null) { |
| b69ab31 | | | 102 | return languageIdOrAlias; |
| b69ab31 | | | 103 | } |
| b69ab31 | | | 104 | |
| b69ab31 | | | 105 | return this.findDisplayNameForScopeName(scopeName) ?? languageIdOrAlias; |
| b69ab31 | | | 106 | } |
| b69ab31 | | | 107 | |
| b69ab31 | | | 108 | /** |
| b69ab31 | | | 109 | * Try to return a human-readable name for the specified scope name. |
| b69ab31 | | | 110 | * Unfortunately, VS Code does not currently expose the language name |
| b69ab31 | | | 111 | * directly: https://github.com/microsoft/vscode/issues/109919. As a |
| b69ab31 | | | 112 | * workaround, we make our best guess from the available aliases associated |
| b69ab31 | | | 113 | * with the scope name. |
| b69ab31 | | | 114 | */ |
| b69ab31 | | | 115 | findDisplayNameForScopeName(scopeName: string): string | null { |
| b69ab31 | | | 116 | const {language} = this.grammars[scopeName] ?? {}; |
| b69ab31 | | | 117 | if (language != null) { |
| b69ab31 | | | 118 | const aliases = this.languages[language].aliases ?? []; |
| b69ab31 | | | 119 | // As a braindead heuristic, we pick the first alias that starts with a |
| b69ab31 | | | 120 | // capital letter. |
| b69ab31 | | | 121 | for (const alias of aliases) { |
| b69ab31 | | | 122 | const firstChar = alias.charAt(0); |
| b69ab31 | | | 123 | if (firstChar.toUpperCase() === firstChar) { |
| b69ab31 | | | 124 | return alias; |
| b69ab31 | | | 125 | } |
| b69ab31 | | | 126 | } |
| b69ab31 | | | 127 | |
| b69ab31 | | | 128 | // If none of the aliases start with a capital letter, pick the first. |
| b69ab31 | | | 129 | return aliases[0] ?? null; |
| b69ab31 | | | 130 | } |
| b69ab31 | | | 131 | return null; |
| b69ab31 | | | 132 | } |
| b69ab31 | | | 133 | } |
| b69ab31 | | | 134 | |
| b69ab31 | | | 135 | function createIndex(languages: {[language: string]: LanguageConfiguration}): LanguageIndex { |
| b69ab31 | | | 136 | const filenames = new Map(); |
| b69ab31 | | | 137 | const extensions = new Map(); |
| b69ab31 | | | 138 | const aliases = new Map(); |
| b69ab31 | | | 139 | const supportedLanguages = new Set<string>(); |
| b69ab31 | | | 140 | |
| b69ab31 | | | 141 | for (const [language, configuration] of Object.entries(languages)) { |
| b69ab31 | | | 142 | supportedLanguages.add(language); |
| b69ab31 | | | 143 | configuration.aliases?.forEach((alias: string) => { |
| b69ab31 | | | 144 | if (alias.toLowerCase() !== language) { |
| b69ab31 | | | 145 | supportedLanguages.add(alias); |
| b69ab31 | | | 146 | aliases.set(alias, language); |
| b69ab31 | | | 147 | } |
| b69ab31 | | | 148 | }); |
| b69ab31 | | | 149 | const languageFilenames = configuration.filenames ?? []; |
| b69ab31 | | | 150 | languageFilenames.forEach((filename: string) => filenames.set(filename, language)); |
| b69ab31 | | | 151 | |
| b69ab31 | | | 152 | const languageExtensions = configuration.extensions ?? []; |
| b69ab31 | | | 153 | languageExtensions.forEach((extension: string) => extensions.set(extension, language)); |
| b69ab31 | | | 154 | } |
| b69ab31 | | | 155 | |
| b69ab31 | | | 156 | return {filenames, extensions, supportedLanguages, aliases}; |
| b69ab31 | | | 157 | } |