syntaxHighlightingWorker.ts

6.6 KB230 lines

1	/**
2	* Copyright (c) Meta Platforms, Inc. and affiliates.
3	*
4	* This source code is licensed under the MIT license found in the
5	* LICENSE file in the root directory of this source tree.
6	*/
7
8	import type {IGrammar} from 'vscode-textmate';
9	import type {ThemeColor} from '../../theme';
10	import type {
11	SyntaxWorkerRequest,
12	SyntaxWorkerResponse,
13	TokenizedDiffHunks,
14	TokenizedHunk,
15	} from './syntaxHighlightingTypes';
16
17	import {CancellationToken} from 'shared/CancellationToken';
18	import FilepathClassifier from 'shared/textmate-lib/FilepathClassifier';
19	import {tokenizeLines} from 'shared/textmate-lib/tokenize';
20	import {loadWASM} from 'vscode-oniguruma';
21	import {grammars, languages} from '../../generated/textmate/TextMateGrammarManifest';
22	import {getGrammar, getGrammarStore} from './grammar';
23
24	const URL_TO_ONIG_WASM = './generated/textmate/onig.wasm';
25
26	/* This file is intended to be executed in a WebWorker, without access to the DOM. */
27
28	/**
29	* Fetch requests inside the webworker must be made relative to the base URI.
30	* By executing the web worker via a blob: URL, we can't depend on the base being inherited.
31	*/
32	let globalBaseUri: string;
33
34	async function loadGrammar(
35	theme: ThemeColor,
36	path: string,
37	postMessage: (msg: SyntaxWorkerResponse) => void,
38	): Promise<IGrammar \| undefined> {
39	await ensureOnigurumaIsLoaded(globalBaseUri);
40
41	const scopeName = getFilepathClassifier().findScopeNameForPath(path);
42	if (!scopeName) {
43	return undefined;
44	}
45
46	const store = getGrammarStore(theme, globalBaseUri, colorMap => {
47	// tell client the newest colorMap
48	postMessage({type: 'cssColorMap', colorMap} as SyntaxWorkerResponse);
49	});
50
51	const grammar = await getGrammar(store, scopeName);
52	return grammar ?? undefined;
53	}
54
55	const cancellationTokenForId = new Map<number, CancellationToken>();
56
57	class WorkQueue {
58	private queue: Array<() => Promise<void>> = [];
59	private isProcessing = false;
60
61	public push(work: () => Promise<void>) {
62	this.queue.push(work);
63
64	if (!this.isProcessing) {
65	this.processNext();
66	}
67	}
68
69	private async processNext() {
70	if (this.queue.length > 0) {
71	const work = this.queue.shift();
72	this.isProcessing = true;
73	// Allow the task queue to be emptied before continuing,
74	// so we can process cancel messages
75	await new Promise(res => setTimeout(res, 0));
76	await work?.().catch(err => {
77	// eslint-disable-next-line no-console
78	console.error(err);
79	return null;
80	});
81	this.isProcessing = false;
82	this.processNext();
83	}
84	}
85	}
86
87	const workQueue = new WorkQueue();
88
89	export function handleMessage(
90	postMessage: (msg: SyntaxWorkerResponse & {id?: number}) => unknown,
91	event: MessageEvent,
92	) {
93	const data = event.data as SyntaxWorkerRequest & {id: number};
94
95	const token = new CancellationToken();
96	if (data.id != null) {
97	cancellationTokenForId.set(data.id, token);
98	}
99	switch (data.type) {
100	case 'setBaseUri': {
101	globalBaseUri = data.base;
102	break;
103	}
104	case 'tokenizeContents': {
105	workQueue.push(async () => {
106	const grammar = await loadGrammar(data.theme, data.path, postMessage);
107	const result = tokenizeContent(grammar, data.content, token);
108	postMessage({type: data.type, id: data.id, result});
109	cancellationTokenForId.delete(data.id);
110	});
111	break;
112	}
113	case 'tokenizeHunks': {
114	workQueue.push(async () => {
115	const grammar = await loadGrammar(data.theme, data.path, postMessage);
116	const result = tokenizeHunks(grammar, data.hunks, token);
117	postMessage({type: data.type, id: data.id, result});
118	cancellationTokenForId.delete(data.id);
119	});
120	break;
121	}
122	case 'cancel': {
123	const token = cancellationTokenForId.get(data.idToCancel);
124	token?.cancel();
125	}
126	}
127	}
128
129	if (typeof self.document === 'undefined') {
130	// inside WebWorker, use global onmessage and postMessage
131	onmessage = handleMessage.bind(undefined, postMessage);
132	// outside of a WebWorker, the exported `handleMessage` function should be used instead.
133	}
134
135	function tokenizeHunks(
136	grammar: IGrammar \| undefined,
137	hunks: Array<{lines: Array<string>}>,
138	cancellationToken: CancellationToken,
139	): TokenizedDiffHunks \| undefined {
140	if (grammar == null) {
141	return undefined;
142	}
143
144	if (cancellationToken.isCancelled) {
145	// check for cancellation before doing expensive highlighting
146	return undefined;
147	}
148
149	const tokenizedPatches: TokenizedDiffHunks = hunks
150	.map(hunk => recoverFileContentsFromPatchLines(hunk.lines))
151	.map(([before, after]) => [tokenizeLines(before, grammar), tokenizeLines(after, grammar)]);
152
153	return tokenizedPatches;
154	}
155
156	function tokenizeContent(
157	grammar: IGrammar \| undefined,
158	content: Array<string>,
159	cancellationToken: CancellationToken,
160	): TokenizedHunk \| undefined {
161	if (grammar == null) {
162	return undefined;
163	}
164
165	if (cancellationToken.isCancelled) {
166	// check for cancellation before doing expensive highlighting
167	return undefined;
168	}
169
170	return tokenizeLines(content, grammar);
171	}
172
173	/**
174	* Patch lines start with ' ', '+', or '-'. From this we can reconstruct before & after file contents as strings,
175	* which we can actually use in the syntax highlighting.
176	*/
177	function recoverFileContentsFromPatchLines(
178	lines: Array<string>,
179	): [before: Array<string>, after: Array<string>] {
180	const linesBefore = [];
181	const linesAfter = [];
182	for (const line of lines) {
183	if (line[0] === ' ') {
184	linesBefore.push(line.slice(1));
185	linesAfter.push(line.slice(1));
186	} else if (line[0] === '+') {
187	linesAfter.push(line.slice(1));
188	} else if (line[0] === '-') {
189	linesBefore.push(line.slice(1));
190	}
191	}
192
193	return [linesBefore, linesAfter];
194	}
195
196	let onigurumaLoadingJob: Promise<void> \| null = null;
197	function ensureOnigurumaIsLoaded(base: string): Promise<void> {
198	if (onigurumaLoadingJob === null) {
199	onigurumaLoadingJob = loadOniguruma(base);
200	}
201	return onigurumaLoadingJob;
202	}
203
204	async function loadOniguruma(base: string): Promise<void> {
205	const url = new URL(URL_TO_ONIG_WASM, base);
206	const onigurumaWASMRequest = fetch(url);
207	const response = await onigurumaWASMRequest;
208
209	const contentType = response.headers.get('content-type');
210	const useStreamingParser = contentType === 'application/wasm';
211
212	if (useStreamingParser) {
213	await loadWASM(response);
214	} else {
215	const dataOrOptions = {
216	data: await response.arrayBuffer(),
217	};
218	await loadWASM(dataOrOptions);
219	}
220	}
221
222	let _classifier: FilepathClassifier \| null = null;
223
224	function getFilepathClassifier(): FilepathClassifier {
225	if (_classifier == null) {
226	_classifier = new FilepathClassifier(grammars, languages);
227	}
228	return _classifier;
229	}
230