5.5 KB158 lines
Blame
1/**
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8import type {Grammar, LanguageConfiguration} from './types';
9
10import splitPath from './splitPath';
11
12type LanguageIndex = {
13 /** File name to language id. */
14 filenames: Map<string, string>;
15 /** File extension to language id. */
16 extensions: Map<string, string>;
17 /** Language alias to language id. */
18 aliases: Map<string, string>;
19 /** All supported language ids. */
20 supportedLanguages: Set<string>;
21};
22
23export default class FilepathClassifier {
24 private index: LanguageIndex;
25
26 constructor(
27 private grammars: {[scopeName: string]: Grammar},
28 private languages: {[language: string]: LanguageConfiguration},
29 ) {
30 this.index = createIndex(languages);
31 }
32
33 findScopeNameForPath(path: string): string | null {
34 const [, filename] = splitPath(path);
35 const language = this.findTextMateLanguage(filename);
36 return language != null ? this.findScopeNameForLanguage(language) : null;
37 }
38
39 findScopeNameForLanguage(language: string): string | null {
40 for (const [scopeName, grammar] of Object.entries(this.grammars)) {
41 if (grammar.language === language) {
42 return scopeName;
43 }
44 }
45 return null;
46 }
47
48 /**
49 * This function is useful for mapping the tag used in a fenced code block
50 * to a scope name. For example, while the language id for JavaScript is
51 * `javascript` according to the LSP spec, users frequently use the alias
52 * `js` when creating fenced code blocks, so we would like to be able to map
53 * both to the scope name `source.js`.
54 *
55 * Note that the TextMate grammar for Markdown hardcodes these aliases,
56 * which is useful when displaying Markdown source in an editor:
57 *
58 * https://github.com/microsoft/vscode/blob/ea0e3e0d1fab/extensions/markdown-basics/syntaxes/markdown.tmLanguage.json#L960
59 *
60 * But when rendering Markdown as HTML, clients often have to provide their
61 * own syntax highlighting logic, which has to do its own mapping of the tag
62 * for the fenced code block. For example, here is where highlight.js declares
63 * its aliases for Javascript [sic]:
64 *
65 * https://github.com/highlightjs/highlight.js/blob/91e1898df92a/src/languages/javascript.js#L454
66 */
67 findScopeNameForAlias(alias: string): string | null {
68 const language = this.index.aliases.get(alias) ?? alias;
69 return this.findScopeNameForLanguage(language);
70 }
71
72 /**
73 * Given a filename like `index.js` or `BUCK`, returns the language id of the
74 * TextMate grammar that should be used to highlight it. This function does
75 * *not* depend on Monaco, so it can be used in other contexts.
76 */
77 findTextMateLanguage(filename: string): string | null {
78 const language = this.index.filenames.get(filename);
79 if (language != null) {
80 return language;
81 }
82
83 for (const [extension, language] of this.index.extensions.entries()) {
84 if (filename.endsWith(extension)) {
85 return language;
86 }
87 }
88
89 return null;
90 }
91
92 /**
93 * Makes a best-effort to map the specified language id (like `fsharp`) to a
94 * name that is more familiar to the user (like `F#`). Also supports aliases
95 * so that both `py` and `python` are mapped to `Python`.
96 */
97 getDisplayNameForLanguageId(languageIdOrAlias: string): string {
98 const scopeName =
99 this.findScopeNameForLanguage(languageIdOrAlias) ??
100 this.findScopeNameForAlias(languageIdOrAlias);
101 if (scopeName == null) {
102 return languageIdOrAlias;
103 }
104
105 return this.findDisplayNameForScopeName(scopeName) ?? languageIdOrAlias;
106 }
107
108 /**
109 * Try to return a human-readable name for the specified scope name.
110 * Unfortunately, VS Code does not currently expose the language name
111 * directly: https://github.com/microsoft/vscode/issues/109919. As a
112 * workaround, we make our best guess from the available aliases associated
113 * with the scope name.
114 */
115 findDisplayNameForScopeName(scopeName: string): string | null {
116 const {language} = this.grammars[scopeName] ?? {};
117 if (language != null) {
118 const aliases = this.languages[language].aliases ?? [];
119 // As a braindead heuristic, we pick the first alias that starts with a
120 // capital letter.
121 for (const alias of aliases) {
122 const firstChar = alias.charAt(0);
123 if (firstChar.toUpperCase() === firstChar) {
124 return alias;
125 }
126 }
127
128 // If none of the aliases start with a capital letter, pick the first.
129 return aliases[0] ?? null;
130 }
131 return null;
132 }
133}
134
135function createIndex(languages: {[language: string]: LanguageConfiguration}): LanguageIndex {
136 const filenames = new Map();
137 const extensions = new Map();
138 const aliases = new Map();
139 const supportedLanguages = new Set<string>();
140
141 for (const [language, configuration] of Object.entries(languages)) {
142 supportedLanguages.add(language);
143 configuration.aliases?.forEach((alias: string) => {
144 if (alias.toLowerCase() !== language) {
145 supportedLanguages.add(alias);
146 aliases.set(alias, language);
147 }
148 });
149 const languageFilenames = configuration.filenames ?? [];
150 languageFilenames.forEach((filename: string) => filenames.set(filename, language));
151
152 const languageExtensions = configuration.extensions ?? [];
153 languageExtensions.forEach((extension: string) => extensions.set(extension, language));
154 }
155
156 return {filenames, extensions, supportedLanguages, aliases};
157}
158