addons/shared/textmate-lib/FilepathClassifier.tsblame
View source
b69ab311/**
b69ab312 * Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab313 *
b69ab314 * This source code is licensed under the MIT license found in the
b69ab315 * LICENSE file in the root directory of this source tree.
b69ab316 */
b69ab317
b69ab318import type {Grammar, LanguageConfiguration} from './types';
b69ab319
b69ab3110import splitPath from './splitPath';
b69ab3111
b69ab3112type LanguageIndex = {
b69ab3113 /** File name to language id. */
b69ab3114 filenames: Map<string, string>;
b69ab3115 /** File extension to language id. */
b69ab3116 extensions: Map<string, string>;
b69ab3117 /** Language alias to language id. */
b69ab3118 aliases: Map<string, string>;
b69ab3119 /** All supported language ids. */
b69ab3120 supportedLanguages: Set<string>;
b69ab3121};
b69ab3122
b69ab3123export default class FilepathClassifier {
b69ab3124 private index: LanguageIndex;
b69ab3125
b69ab3126 constructor(
b69ab3127 private grammars: {[scopeName: string]: Grammar},
b69ab3128 private languages: {[language: string]: LanguageConfiguration},
b69ab3129 ) {
b69ab3130 this.index = createIndex(languages);
b69ab3131 }
b69ab3132
b69ab3133 findScopeNameForPath(path: string): string | null {
b69ab3134 const [, filename] = splitPath(path);
b69ab3135 const language = this.findTextMateLanguage(filename);
b69ab3136 return language != null ? this.findScopeNameForLanguage(language) : null;
b69ab3137 }
b69ab3138
b69ab3139 findScopeNameForLanguage(language: string): string | null {
b69ab3140 for (const [scopeName, grammar] of Object.entries(this.grammars)) {
b69ab3141 if (grammar.language === language) {
b69ab3142 return scopeName;
b69ab3143 }
b69ab3144 }
b69ab3145 return null;
b69ab3146 }
b69ab3147
b69ab3148 /**
b69ab3149 * This function is useful for mapping the tag used in a fenced code block
b69ab3150 * to a scope name. For example, while the language id for JavaScript is
b69ab3151 * `javascript` according to the LSP spec, users frequently use the alias
b69ab3152 * `js` when creating fenced code blocks, so we would like to be able to map
b69ab3153 * both to the scope name `source.js`.
b69ab3154 *
b69ab3155 * Note that the TextMate grammar for Markdown hardcodes these aliases,
b69ab3156 * which is useful when displaying Markdown source in an editor:
b69ab3157 *
b69ab3158 * https://github.com/microsoft/vscode/blob/ea0e3e0d1fab/extensions/markdown-basics/syntaxes/markdown.tmLanguage.json#L960
b69ab3159 *
b69ab3160 * But when rendering Markdown as HTML, clients often have to provide their
b69ab3161 * own syntax highlighting logic, which has to do its own mapping of the tag
b69ab3162 * for the fenced code block. For example, here is where highlight.js declares
b69ab3163 * its aliases for Javascript [sic]:
b69ab3164 *
b69ab3165 * https://github.com/highlightjs/highlight.js/blob/91e1898df92a/src/languages/javascript.js#L454
b69ab3166 */
b69ab3167 findScopeNameForAlias(alias: string): string | null {
b69ab3168 const language = this.index.aliases.get(alias) ?? alias;
b69ab3169 return this.findScopeNameForLanguage(language);
b69ab3170 }
b69ab3171
b69ab3172 /**
b69ab3173 * Given a filename like `index.js` or `BUCK`, returns the language id of the
b69ab3174 * TextMate grammar that should be used to highlight it. This function does
b69ab3175 * *not* depend on Monaco, so it can be used in other contexts.
b69ab3176 */
b69ab3177 findTextMateLanguage(filename: string): string | null {
b69ab3178 const language = this.index.filenames.get(filename);
b69ab3179 if (language != null) {
b69ab3180 return language;
b69ab3181 }
b69ab3182
b69ab3183 for (const [extension, language] of this.index.extensions.entries()) {
b69ab3184 if (filename.endsWith(extension)) {
b69ab3185 return language;
b69ab3186 }
b69ab3187 }
b69ab3188
b69ab3189 return null;
b69ab3190 }
b69ab3191
b69ab3192 /**
b69ab3193 * Makes a best-effort to map the specified language id (like `fsharp`) to a
b69ab3194 * name that is more familiar to the user (like `F#`). Also supports aliases
b69ab3195 * so that both `py` and `python` are mapped to `Python`.
b69ab3196 */
b69ab3197 getDisplayNameForLanguageId(languageIdOrAlias: string): string {
b69ab3198 const scopeName =
b69ab3199 this.findScopeNameForLanguage(languageIdOrAlias) ??
b69ab31100 this.findScopeNameForAlias(languageIdOrAlias);
b69ab31101 if (scopeName == null) {
b69ab31102 return languageIdOrAlias;
b69ab31103 }
b69ab31104
b69ab31105 return this.findDisplayNameForScopeName(scopeName) ?? languageIdOrAlias;
b69ab31106 }
b69ab31107
b69ab31108 /**
b69ab31109 * Try to return a human-readable name for the specified scope name.
b69ab31110 * Unfortunately, VS Code does not currently expose the language name
b69ab31111 * directly: https://github.com/microsoft/vscode/issues/109919. As a
b69ab31112 * workaround, we make our best guess from the available aliases associated
b69ab31113 * with the scope name.
b69ab31114 */
b69ab31115 findDisplayNameForScopeName(scopeName: string): string | null {
b69ab31116 const {language} = this.grammars[scopeName] ?? {};
b69ab31117 if (language != null) {
b69ab31118 const aliases = this.languages[language].aliases ?? [];
b69ab31119 // As a braindead heuristic, we pick the first alias that starts with a
b69ab31120 // capital letter.
b69ab31121 for (const alias of aliases) {
b69ab31122 const firstChar = alias.charAt(0);
b69ab31123 if (firstChar.toUpperCase() === firstChar) {
b69ab31124 return alias;
b69ab31125 }
b69ab31126 }
b69ab31127
b69ab31128 // If none of the aliases start with a capital letter, pick the first.
b69ab31129 return aliases[0] ?? null;
b69ab31130 }
b69ab31131 return null;
b69ab31132 }
b69ab31133}
b69ab31134
b69ab31135function createIndex(languages: {[language: string]: LanguageConfiguration}): LanguageIndex {
b69ab31136 const filenames = new Map();
b69ab31137 const extensions = new Map();
b69ab31138 const aliases = new Map();
b69ab31139 const supportedLanguages = new Set<string>();
b69ab31140
b69ab31141 for (const [language, configuration] of Object.entries(languages)) {
b69ab31142 supportedLanguages.add(language);
b69ab31143 configuration.aliases?.forEach((alias: string) => {
b69ab31144 if (alias.toLowerCase() !== language) {
b69ab31145 supportedLanguages.add(alias);
b69ab31146 aliases.set(alias, language);
b69ab31147 }
b69ab31148 });
b69ab31149 const languageFilenames = configuration.filenames ?? [];
b69ab31150 languageFilenames.forEach((filename: string) => filenames.set(filename, language));
b69ab31151
b69ab31152 const languageExtensions = configuration.extensions ?? [];
b69ab31153 languageExtensions.forEach((extension: string) => extensions.set(extension, language));
b69ab31154 }
b69ab31155
b69ab31156 return {filenames, extensions, supportedLanguages, aliases};
b69ab31157}