| 1 | /** |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * |
| 4 | * This source code is licensed under the MIT license found in the |
| 5 | * LICENSE file in the root directory of this source tree. |
| 6 | */ |
| 7 | |
| 8 | import type {Grammar, LanguageConfiguration} from './types'; |
| 9 | |
| 10 | import splitPath from './splitPath'; |
| 11 | |
| 12 | type LanguageIndex = { |
| 13 | /** File name to language id. */ |
| 14 | filenames: Map<string, string>; |
| 15 | /** File extension to language id. */ |
| 16 | extensions: Map<string, string>; |
| 17 | /** Language alias to language id. */ |
| 18 | aliases: Map<string, string>; |
| 19 | /** All supported language ids. */ |
| 20 | supportedLanguages: Set<string>; |
| 21 | }; |
| 22 | |
| 23 | export default class FilepathClassifier { |
| 24 | private index: LanguageIndex; |
| 25 | |
| 26 | constructor( |
| 27 | private grammars: {[scopeName: string]: Grammar}, |
| 28 | private languages: {[language: string]: LanguageConfiguration}, |
| 29 | ) { |
| 30 | this.index = createIndex(languages); |
| 31 | } |
| 32 | |
| 33 | findScopeNameForPath(path: string): string | null { |
| 34 | const [, filename] = splitPath(path); |
| 35 | const language = this.findTextMateLanguage(filename); |
| 36 | return language != null ? this.findScopeNameForLanguage(language) : null; |
| 37 | } |
| 38 | |
| 39 | findScopeNameForLanguage(language: string): string | null { |
| 40 | for (const [scopeName, grammar] of Object.entries(this.grammars)) { |
| 41 | if (grammar.language === language) { |
| 42 | return scopeName; |
| 43 | } |
| 44 | } |
| 45 | return null; |
| 46 | } |
| 47 | |
| 48 | /** |
| 49 | * This function is useful for mapping the tag used in a fenced code block |
| 50 | * to a scope name. For example, while the language id for JavaScript is |
| 51 | * `javascript` according to the LSP spec, users frequently use the alias |
| 52 | * `js` when creating fenced code blocks, so we would like to be able to map |
| 53 | * both to the scope name `source.js`. |
| 54 | * |
| 55 | * Note that the TextMate grammar for Markdown hardcodes these aliases, |
| 56 | * which is useful when displaying Markdown source in an editor: |
| 57 | * |
| 58 | * https://github.com/microsoft/vscode/blob/ea0e3e0d1fab/extensions/markdown-basics/syntaxes/markdown.tmLanguage.json#L960 |
| 59 | * |
| 60 | * But when rendering Markdown as HTML, clients often have to provide their |
| 61 | * own syntax highlighting logic, which has to do its own mapping of the tag |
| 62 | * for the fenced code block. For example, here is where highlight.js declares |
| 63 | * its aliases for Javascript [sic]: |
| 64 | * |
| 65 | * https://github.com/highlightjs/highlight.js/blob/91e1898df92a/src/languages/javascript.js#L454 |
| 66 | */ |
| 67 | findScopeNameForAlias(alias: string): string | null { |
| 68 | const language = this.index.aliases.get(alias) ?? alias; |
| 69 | return this.findScopeNameForLanguage(language); |
| 70 | } |
| 71 | |
| 72 | /** |
| 73 | * Given a filename like `index.js` or `BUCK`, returns the language id of the |
| 74 | * TextMate grammar that should be used to highlight it. This function does |
| 75 | * *not* depend on Monaco, so it can be used in other contexts. |
| 76 | */ |
| 77 | findTextMateLanguage(filename: string): string | null { |
| 78 | const language = this.index.filenames.get(filename); |
| 79 | if (language != null) { |
| 80 | return language; |
| 81 | } |
| 82 | |
| 83 | for (const [extension, language] of this.index.extensions.entries()) { |
| 84 | if (filename.endsWith(extension)) { |
| 85 | return language; |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | return null; |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * Makes a best-effort to map the specified language id (like `fsharp`) to a |
| 94 | * name that is more familiar to the user (like `F#`). Also supports aliases |
| 95 | * so that both `py` and `python` are mapped to `Python`. |
| 96 | */ |
| 97 | getDisplayNameForLanguageId(languageIdOrAlias: string): string { |
| 98 | const scopeName = |
| 99 | this.findScopeNameForLanguage(languageIdOrAlias) ?? |
| 100 | this.findScopeNameForAlias(languageIdOrAlias); |
| 101 | if (scopeName == null) { |
| 102 | return languageIdOrAlias; |
| 103 | } |
| 104 | |
| 105 | return this.findDisplayNameForScopeName(scopeName) ?? languageIdOrAlias; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * Try to return a human-readable name for the specified scope name. |
| 110 | * Unfortunately, VS Code does not currently expose the language name |
| 111 | * directly: https://github.com/microsoft/vscode/issues/109919. As a |
| 112 | * workaround, we make our best guess from the available aliases associated |
| 113 | * with the scope name. |
| 114 | */ |
| 115 | findDisplayNameForScopeName(scopeName: string): string | null { |
| 116 | const {language} = this.grammars[scopeName] ?? {}; |
| 117 | if (language != null) { |
| 118 | const aliases = this.languages[language].aliases ?? []; |
| 119 | // As a braindead heuristic, we pick the first alias that starts with a |
| 120 | // capital letter. |
| 121 | for (const alias of aliases) { |
| 122 | const firstChar = alias.charAt(0); |
| 123 | if (firstChar.toUpperCase() === firstChar) { |
| 124 | return alias; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | // If none of the aliases start with a capital letter, pick the first. |
| 129 | return aliases[0] ?? null; |
| 130 | } |
| 131 | return null; |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | function createIndex(languages: {[language: string]: LanguageConfiguration}): LanguageIndex { |
| 136 | const filenames = new Map(); |
| 137 | const extensions = new Map(); |
| 138 | const aliases = new Map(); |
| 139 | const supportedLanguages = new Set<string>(); |
| 140 | |
| 141 | for (const [language, configuration] of Object.entries(languages)) { |
| 142 | supportedLanguages.add(language); |
| 143 | configuration.aliases?.forEach((alias: string) => { |
| 144 | if (alias.toLowerCase() !== language) { |
| 145 | supportedLanguages.add(alias); |
| 146 | aliases.set(alias, language); |
| 147 | } |
| 148 | }); |
| 149 | const languageFilenames = configuration.filenames ?? []; |
| 150 | languageFilenames.forEach((filename: string) => filenames.set(filename, language)); |
| 151 | |
| 152 | const languageExtensions = configuration.extensions ?? []; |
| 153 | languageExtensions.forEach((extension: string) => extensions.set(extension, language)); |
| 154 | } |
| 155 | |
| 156 | return {filenames, extensions, supportedLanguages, aliases}; |
| 157 | } |
| 158 | |