linelog.ts · grove

38.7 KB1165 lines

1	/**
2	* Portions Copyright (c) Meta Platforms, Inc. and affiliates.
3	*
4	* This source code is licensed under the MIT license found in the
5	* LICENSE file in the root directory of this source tree.
6	*/
7
8	/*
9
10	Copyright (c) 2020 Jun Wu
11
12	Permission is hereby granted, free of charge, to any person obtaining a copy
13	of this software and associated documentation files (the "Software"), to deal
14	in the Software without restriction, including without limitation the rights
15	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16	copies of the Software, and to permit persons to whom the Software is
17	furnished to do so, subject to the following conditions:
18
19	The above copyright notice and this permission notice shall be included in all
20	copies or substantial portions of the Software.
21
22	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28	SOFTWARE.
29
30	*/
31
32	import type {RecordOf, ValueObject} from 'immutable';
33	import type {LRUWithStats} from 'shared/LRU';
34
35	import {hash, Set as ImSet, List, Record} from 'immutable';
36	import {cached, cachedMethod, LRU} from 'shared/LRU';
37	import {diffLines, splitLines} from 'shared/diff';
38	import {SelfUpdate} from 'shared/immutableExt';
39	import {nullthrows} from 'shared/utils';
40	import {assert} from './utils';
41
42	/** Operation code. */
43	enum Op {
44	/** Unconditional jump. */
45	J = 0,
46	/** Jump if the current rev >= operand. */
47	JGE = 1,
48	/** Jump if the current rev < operand. */
49	JL = 2,
50	/** Append a line. */
51	LINE = 3,
52	/** End execution. */
53	END = 4,
54	}
55
56	/** J instruction. */
57	const J = Record(
58	{
59	/** Opcode: J */
60	op: Op.J,
61	/** Program counter (offset to jump). */
62	pc: 0,
63	},
64	'J',
65	);
66	type J = RecordOf<{
67	op: Op.J;
68	pc: number;
69	}>;
70
71	/** JGE instruction. */
72	const JGE = Record(
73	{
74	/** Opcode: JGE */
75	op: Op.JGE,
76	/** `rev` to test. */
77	rev: 0,
78	/** Program counter (offset to jump). */
79	pc: 0,
80	},
81	'JGE',
82	);
83	type JGE = RecordOf<{
84	op: Op.JGE;
85	rev: Rev;
86	pc: number;
87	}>;
88
89	/** JL instruction. */
90	const JL = Record(
91	{
92	/** Opcode: JL */
93	op: Op.JL,
94	/** `rev` to test. */
95	rev: 0,
96	/** Program counter (offset to jump). */
97	pc: 0,
98	},
99	'JL',
100	);
101	type JL = RecordOf<{
102	op: Op.JL;
103	rev: Rev;
104	pc: number;
105	}>;
106
107	/** LINE instruction. */
108	const LINE = Record(
109	{
110	/** Opcode: LINE */
111	op: Op.LINE,
112	/** `rev` to test. */
113	rev: 0,
114	/** Line content. Includes EOL. */
115	data: '',
116	},
117	'LINE',
118	);
119	type LINE = RecordOf<{
120	op: Op.LINE;
121	rev: Rev;
122	data: string;
123	}>;
124
125	/** END instruction. */
126	const END = Record(
127	{
128	/** Opcode: END */
129	op: Op.END,
130	},
131	'END',
132	);
133	type END = RecordOf<{
134	op: Op.END;
135	}>;
136
137	/** Program counter (offset to instructions). */
138	type Pc = number;
139
140	/** Revision number. Usually starts from 1. Larger number means newer versions.
141	*
142	* For advanced use-cases, this can be a floating number. For example, absorb
143	* might use rev 2.5 between rev2 and rev 3, indicating the chunk is currently
144	* absorbed into rev 2 (by checking out 2.999), while still preserving the rev
145	* 2 losslessly (by checking out 2, instead of 2.999). `absorb` might also use
146	* different fractional part to "tag" the chunk so they can be treated
147	* differently. For example, there are 2 chunk edits in the working copy a.txt,
148	* absorb might use `1 / 1024` to represent the first edit, and `2 / 1024` for
149	* the second chunk edit so those edits can be moved (remapped) separately.
150	* absorb might also split a chunk edit into multiple edits so the individual
151	* lines can be moved separately.
152	*/
153	type Rev = number;
154
155	/** Index of a line. Starts from 0. */
156	type LineIdx = number;
157
158	/** Instruction. */
159	type Inst = J \| END \| JGE \| JL \| LINE;
160
161	/** Information about a line. Internal (`lines`) result of `LineLog.checkOut`. */
162	interface LineInfo {
163	/** Line content. Includes EOL. */
164	data: string;
165	/** Added by the given rev. */
166	rev: Rev;
167	/** Produced by the instruction at the given offset. */
168	pc: Pc;
169	/**
170	* Whether the line is deleted.
171	* This is always `false` if `checkOut(rev, None)`.
172	* It might be `true` when checking out a range of revisions
173	* (aka. `start` passed to `checkOut` is not `null`).
174	*/
175	deleted: boolean;
176	}
177
178	/** A "flatten" line. Result of `LineLog.flatten()`. */
179	type FlattenLineProps = {
180	/** The line is present in the given revisions. */
181	revs: ImSet<Rev>;
182	/** Content of the line, including `\n`. */
183	data: string;
184	};
185	const FlattenLine = Record<FlattenLineProps>({revs: ImSet(), data: ''});
186	type FlattenLine = RecordOf<FlattenLineProps>;
187
188	/** Used by visitWithInsDelStacks */
189	type Frame = {rev: Rev; endPc: Pc};
190
191	/**
192	* List of instructions.
193	*
194	* This is a wrapper of `List<Inst>` for more efficient `hashCode` and `equals`
195	* calculations. The default `hashCode` from `immutable.js` scans the whole
196	* `List`. In this implementation we keep 2 internal values: hash and str. The
197	* `hash` is used for hashCode, and the `str` is an append-only string that
198	* tracks the `editChunk` and other operations to `List<Inst>` for testing
199	* equality.
200	*
201	* You might have noticed that the `str` equality might not match the
202	* `List<Inst>` equality. For example, if we remap 1 to 2, then remap 2 to 1,
203	* the `List<Inst>` is not changed, but the `str` is changed. It is okay to
204	* treat the linelogs as different in this case as we almost always immediately
205	* rebuild linelogs after a `remap`. It's important to make sure `recordText`
206	* with the same text list gets cache hit.
207	*/
208	class Code implements ValueObject {
209	constructor(
210	private instList: List<Inst> = List([END() as Inst]),
211	private __hash: Readonly<number> = 0,
212	private __valueOf: Readonly<string> = '',
213	) {}
214
215	getSize(): number {
216	return this.instList.size;
217	}
218
219	get(pc: Pc): Readonly<Inst> \| undefined {
220	return this.instList.get(pc);
221	}
222
223	valueOf(): string {
224	return this.__valueOf;
225	}
226
227	equals(other: Code): boolean {
228	return this.__valueOf === other.__valueOf;
229	}
230
231	hashCode(): number {
232	return this.__hash;
233	}
234
235	/**
236	* Dump instructions in a human readable format. Useful for debugging.
237	* Note: This exposes internal details which might change in the future.
238	*/
239	describeHumanReadableInstructions(): string[] {
240	return this.instList.map((inst, i) => `${i}: ${describeInst(inst)}`).toArray();
241	}
242
243	/**
244	* Dump lines with ASCII annotated insertions and deletions stacks.
245	*/
246	describeHumanReadableInsDelStacks(): string[] {
247	// 1st Pass: Figure out the max stack depth, line length for padding.
248	let maxInsStackDepth = 0;
249	let maxDelStackDepth = 0;
250	let maxLineLength = 'Insert (rev: 1000)'.length;
251	this.visitWithInsDelStacks((insStack, delStack) => {
252	return {
253	onStackPush: () => {
254	maxInsStackDepth = Math.max(maxInsStackDepth, insStack.length + 1);
255	maxDelStackDepth = Math.max(maxDelStackDepth, delStack.length + 2);
256	},
257	onLine: line => {
258	maxLineLength = Math.max(maxLineLength, line.data.length + 'Line: '.length);
259	},
260	};
261	});
262	// 2nd Pass: Render the instructions.
263	const result: string[] = [];
264	this.visitWithInsDelStacks((insStack, delStack) => {
265	const pushLine = (data: string, leftAdjust?: number, rightAdjust?: number) => {
266	const insDepth = insStack.length - 1 + (leftAdjust ?? 0);
267	const delDepth = delStack.length + (rightAdjust ?? 0);
268	const insPad = maxInsStackDepth - insDepth;
269	const delPad = maxDelStackDepth - delDepth;
270	const left =
271	'\|'.repeat(insDepth) +
272	(leftAdjust == null ? ' '.repeat(insPad + 1) : `+${'-'.repeat(insPad)}`);
273	const right =
274	(rightAdjust == null ? ' '.repeat(delPad + 1) : `${'-'.repeat(delPad)}+`) +
275	'\|'.repeat(delDepth);
276	const middle = data + ' '.repeat(maxLineLength - data.length);
277	result.push(left + middle + right);
278	};
279	return {
280	onStackPush: stack => {
281	const rev = stack.at(-1)?.rev ?? 0;
282	if (stack === insStack) {
283	// \| \| +------ Insert (rev x) <- this line
284	// \| \| \| Line: .... <- following lines
285	pushLine(`Insert (rev ${rev})`, -1);
286	} else {
287	pushLine(`Delete (rev ${rev})`, undefined, -1);
288	}
289	},
290	onStackPop: stack => {
291	if (stack === insStack) {
292	pushLine('', 0);
293	} else {
294	pushLine('', undefined, 0);
295	}
296	},
297	onLine: line => {
298	pushLine(`Line: ${line.data.trimEnd()}`);
299	},
300	};
301	});
302	return result;
303	}
304
305	editChunk(
306	aRev: Rev,
307	a1: LineIdx,
308	a2: LineIdx,
309	bRev: Rev,
310	bLines: string[],
311	[aLines, aLinesMutable]: [LineInfo[], true] \| [Readonly<LineInfo[]>, false],
312	): Code {
313	const start = this.instList.size;
314
315	assert(a1 <= a2, 'illegal chunk (a1 < a2)');
316	assert(a2 <= aLines.length, 'out of bound a2 (wrong aRev?)');
317
318	// See also https://sapling-scm.com/docs/internals/linelog/#editing-linelog
319	// # Before # After
320	// # (pc): Instruction # (pc): Instruction
321	// : ... : ...
322	// a1Pc: <a1Inst> a1Pc: J start
323	// a1Pc+1: ... a1Pc+1: ...
324	// : ... : ...
325	// a2Pc: ... a2Pc: ...
326	// : ... : ...
327	// len: N/A start: JL brev b2Pc [1]
328	// : LINE brev b1 [1]
329	// : LINE brev b1+1 [1]
330	// : ... [1]
331	// : LINE brev b2-1 [1]
332	// b2Pc: JGE brev a2Pc [2]
333	// : <a1Inst> (moved) [3]
334	// : J a1Pc+1 [4]
335	// [1]: Only present if `bLines` is not empty.
336	// [2]: Only present if `a1 < a2`.
337	// There are 2 choices for "a2Pc":
338	// - The a2 line exactly: aLines[a2].pc
339	// - The next instruction of the "a2 -1" line: aLines[a2 - 1].pc + 1
340	// We pick the latter to avoid overly aggressive deletion.
341	// The original C implementation might pick the former when editing
342	// the last rev for performance optimization.
343	// [3]: <a1 Inst> could be LINE or END.
344	// [4]: As an optimization, this is only present if <a1 Inst> is not END.
345	//
346	// Optimization [OPT1] to make reorder less restrictive, treat insertion
347	// (a1 == a2) at the beginning of another insertion (<a1 Inst> is after a
348	// <JL>) specially. Our goal is to avoid nested JLs. Instead of patching
349	// the a1Inst after the JL, we patch the JL (jlInst) so we can insert our
350	// new JL (for this edit) before the old JL (jlInst, being patched).
351	// Note this "JL followed by a1Inst" optimization needs to be applicable
352	// multiple times. To do that, we also move the a1Inst to right after the
353	// jlInst so the pattern "JL followed by a1Inst" can be recognized by the
354	// next editChunk to apply the same optimization.
355	//
356	// # Before # After
357	// # (pc): Instruction # (pc): Instruction
358	// : ... : ...
359	// : <jlInst> a1Pc-1: J start [*]
360	// a1Pc: <a1Inst> a1Pc: NOP (J a1Pc+1) [*]
361	// : ... : ...
362	// len: N/A start: JL brev b2Pc
363	// : (bLines)
364	// b2Pc: <jlInst> (moved) [*]
365	// : <a1Inst> (moved)
366	// : J a1Pc [*]
367	const newInstList = this.instList.withMutations(origCode => {
368	let code = origCode;
369	const a1Pc = aLines[a1].pc;
370	// If `jlInst` is set, optimization [OPT1] is in effect.
371	let jlInst = a1Pc > 0 && a1 === a2 ? code.get(a1Pc - 1) : undefined;
372	if (jlInst?.op !== Op.JL) {
373	jlInst = undefined;
374	}
375	if (bLines.length > 0) {
376	// [1]
377	const b2Pc = start + bLines.length + 1;
378	code = code.push(JL({rev: bRev, pc: b2Pc}) as Inst);
379	bLines.forEach(line => {
380	code = code.push(LINE({rev: bRev, data: line}) as Inst);
381	});
382	assert(b2Pc === code.size, 'bug: wrong pc');
383	}
384	if (a1 < a2) {
385	assert(jlInst === undefined, 'OPT1 requires no deletion');
386	// [2]
387	const a2Pc = aLines[a2 - 1].pc + 1;
388	code = code.push(JGE({rev: bRev, pc: a2Pc}) as Inst);
389	}
390	if (aLinesMutable) {
391	aLines[a1] = {...aLines[a1], pc: jlInst == null ? code.size : code.size + 1};
392	}
393	const a1Inst = nullthrows(code.get(a1Pc));
394	if (jlInst === undefined) {
395	// [3]
396	code = code.push(a1Inst);
397	if (a1Inst.op /* LINE or END */ !== Op.END) {
398	// [4]
399	code = code.push(J({pc: a1Pc + 1}) as Inst);
400	}
401	code = code.set(a1Pc, J({pc: start}) as Inst);
402	} else {
403	code = code
404	.push(jlInst)
405	.push(a1Inst)
406	.push(J({pc: a1Pc}) as Inst)
407	.set(a1Pc - 1, J({pc: start}) as Inst)
408	.set(a1Pc, J({pc: a1Pc + 1}) as J);
409	}
410	return code;
411	});
412
413	if (aLinesMutable) {
414	const newLines = bLines.map((s, i) => {
415	return {data: s, rev: bRev, pc: start + 1 + i, deleted: false};
416	});
417	aLines.splice(a1, a2 - a1, ...newLines);
418	}
419
420	const newValueOf = `E${aRev},${a1},${a2},${bRev},${bLines.join('')}`;
421	return this.newCode(newInstList, newValueOf);
422	}
423
424	/**
425	* Visit (execute) instructions with the insertion and deletion stacks
426	* converted from JGE and JL instructions maintained by this function.
427	*
428	* See the comment in this function about how to turn JGE and JL to
429	* the stacks.
430	*
431	* For stacks like this:
432	*
433	* +---- Insertion (rev 1)
434	* \| Line 1
435	* \| ----+ Deletion (rev 4)
436	* \| Line 2 \|
437	* \| +-- Insertion (rev 2) \|
438	* \| \| Line 3 \|
439	* \| \| --+ \| Deletion (rev 3)
440	* \| \| Line 4 \| \|
441	* \| +-- \| \|
442	* \| Line 5 \| \|
443	* \| --+ \|
444	* \| Line 6 \|
445	* \| ----+
446	* \| Line 7
447	* +----
448	*
449	* When visiting "Line 3", the callsite will get insertion stack =
450	* [rev 1, rev 2] and deletion stack = [rev 4].
451	*
452	* Internally, this is done by turning conditional jumps (JGE or JL)
453	* to stack pushes, pops at the JGE or JL destinations, and follow
454	* unconditional jumps (J) as usual. For more details, see the comment
455	* inside this function.
456	*
457	* This function will call `withContext` to provide the `insStack` and
458	* `delStack` context, and expect the callsite to provide handlers it
459	* is interested in.
460	*
461	* Typical use-cases include features that need to scan all (ever existed)
462	* lines like flatten() and calculateDepMap().
463	*/
464	visitWithInsDelStacks(
465	withContext: (
466	insStack: Readonly<Frame[]>,
467	delStack: Readonly<Frame[]>,
468	) => {
469	/** Before stack pop or push */
470	onPc?: (pc: number) => void;
471	/** After stack pop, before stack push */
472	onLine?: (inst: LINE) => void;
473	/** After stack pop, before stack push */
474	onConditionalJump?: (inst: JGE \| JL) => void;
475	/** After stack push */
476	onStackPush?: (stack: Readonly<Frame[]>) => void;
477	/** After stack pop */
478	onStackPop?: (stack: Readonly<Frame[]>) => void;
479	},
480	) {
481	// How does it work? First, insertions and deletions in linelog form
482	// tree structures. For example:
483	//
484	// +---- Insertion (rev 1)
485	// \| Line 1
486	// \| ----+ Deletion (rev 4)
487	// \| Line 2 \|
488	// \| +-- Insertion (rev 2) \|
489	// \| \| Line 3 \|
490	// \| \| --+ \| Deletion (rev 3)
491	// \| \| Line 4 \| \|
492	// \| +-- \| \|
493	// \| Line 5 \| \|
494	// \| --+ \|
495	// \| Line 6 \|
496	// \| ----+
497	// \| Line 7
498	// +----
499	//
500	// Note interleaved insertions do not happen. For example, this does not
501	// happen:
502	//
503	// +---- Insertion (rev 1)
504	// \| Line 1
505	// \| +-- Insertion (rev 2)
506	// \| \| Line 2
507	// +-\|--
508	// \| Line 3
509	// +--
510	//
511	// Similarly, interleaved deletions do not happen. However, insertions
512	// might interleave with deletions, as shown above.
513	//
514	// Let's look at how this is done at the instruction level. First, look at
515	// the instructions generated by editChunk:
516	//
517	// a2Pc: ...
518	// ...
519	// start: JL brev b2Pc
520	// ...
521	// b2Pc: JGE brev a2Pc
522	// : <a1 Inst>
523	// end: J a1Pc+1
524	//
525	// JL is used for insertion, JGE is used for deletion. We then use them to
526	// manipulate the insStack and delStack:
527	//
528	// insStack:
529	//
530	// - On "start: JL brev b2Pc":
531	// Do not follow the JL jump.
532	// Push {rev, b2Pc} to insStack.
533	// - When pc is b2Pc, pop insStack.
534	//
535	// delStack:
536	//
537	// - On "b2Pc: JGE brev a2Pc":
538	// Do not follow the JGE jump.
539	// Push {rev, a2Pc} to delStack.
540	// - When pc is a2Pc, pop delStack.
541	//
542	// You might have noticed that we don't use the revs in LINE instructions
543	// at all. This is because that LINE rev always matches its JL rev in this
544	// implementation. In other words, the "rev" in LINE instruction is
545	// redundant as it can be inferred from JL, with an insStack. Note in the
546	// original C implementation of LineLog the LINE rev can be different from
547	// the JL rev, to deal with merges while maintaining a linear history.
548	const insStack: Frame[] = [{rev: 0, endPc: -1}];
549	const delStack: Frame[] = [];
550	const {onPc, onLine, onConditionalJump, onStackPush, onStackPop} = withContext(
551	insStack,
552	delStack,
553	);
554	let pc = 0;
555	let patience = this.getSize() * 2;
556	while (patience > 0) {
557	onPc?.(pc);
558	if (insStack.at(-1)?.endPc === pc) {
559	insStack.pop();
560	onStackPop?.(insStack);
561	}
562	if (delStack.at(-1)?.endPc === pc) {
563	delStack.pop();
564	onStackPop?.(delStack);
565	}
566	const code = nullthrows(this.get(pc));
567	switch (code.op) {
568	case Op.LINE:
569	onLine?.(code);
570	pc += 1;
571	break;
572	case Op.END:
573	patience = -1;
574	break;
575	case Op.J:
576	pc = code.pc;
577	break;
578	case Op.JGE:
579	onConditionalJump?.(code);
580	delStack.push({rev: code.rev, endPc: code.pc});
581	onStackPush?.(delStack);
582	pc += 1;
583	break;
584	case Op.JL:
585	onConditionalJump?.(code);
586	insStack.push({rev: code.rev, endPc: code.pc});
587	onStackPush?.(insStack);
588	pc += 1;
589	break;
590	default:
591	assert(false, 'bug: unknown code');
592	}
593	patience -= 1;
594	}
595	if (patience === 0) {
596	assert(false, 'bug: code does not end in time');
597	}
598	}
599
600	remapRevs(revMapOrFunc: Map<Rev, Rev> \| ((rev: Rev) => Rev)): [Code, Rev] {
601	let revMap = new Map<Rev, Rev>();
602	if (typeof revMapOrFunc === 'function') {
603	this.instList.forEach(inst => {
604	const rev = (inst as RecordOf<{rev?: number}>).rev;
605	if (rev != null && !revMap.has(rev)) {
606	revMap.set(rev, revMapOrFunc(rev));
607	}
608	});
609	} else {
610	revMap = revMapOrFunc;
611	}
612
613	const valueOfString = [...revMap.entries()].toString();
614	return this.rewriteInsts((c, _pc) => {
615	if (c.op === Op.JGE \|\| c.op === Op.JL \|\| c.op === Op.LINE) {
616	const newRev = revMap.get(c.rev) ?? c.rev;
617	// TypeScript cannot prove `c` has `rev`. Ideally it can figure out it automatically.
618	return (c as RecordOf<{rev: number}>).set('rev', newRev) as Inst;
619	}
620	return c;
621	}, valueOfString);
622	}
623
624	/**
625	* Rewrite all instructions. Returns [newCode, newMaxRev].
626	* `valueOfString` sets a unique string that represents the change for equal tests.
627	*/
628	rewriteInsts(func: (inst: Inst, pc: Pc) => Inst, valueOfString?: string): [Code, Rev] {
629	let newMaxRev = 0;
630	const newInstList = this.instList
631	.map((inst, pc) => {
632	const newInst = func(inst, pc);
633	const newRev = (newInst as RecordOf<{rev: number}>).rev;
634	if (newRev != null && newRev > newMaxRev) {
635	newMaxRev = newRev;
636	}
637	return newInst;
638	})
639	.toList();
640	const newValueOf = `R${valueOfString ?? func}`;
641	const newCode = this.newCode(newInstList, newValueOf);
642	return [newCode, newMaxRev];
643	}
644
645	/**
646	* Drop edits for revs >= the given rev. Returns [newCode, newMaxRev].
647	*/
648	truncate(rev: Rev): [Code, Rev] {
649	const valueOfString = `TRUNC${rev}`;
650	return this.rewriteInsts((inst, pc) => {
651	if (inst.op === Op.JGE \|\| inst.op === Op.LINE) {
652	if (inst.rev >= rev) {
653	// NOP.
654	return J({pc: pc + 1}) as Inst;
655	}
656	} else if (inst.op === Op.JL) {
657	if (inst.rev >= rev) {
658	// Unconditional jump.
659	return J({pc: inst.pc}) as Inst;
660	}
661	}
662	return inst;
663	}, valueOfString);
664	}
665
666	private newCode(instList: List<Inst>, newValueOf: string): Code {
667	const newStr = this.__valueOf + '\0' + newValueOf;
668	// We want bitwise operations.
669	// eslint-disable-next-line no-bitwise
670	const newHash = (this.__hash * 23 + hash(newValueOf)) & 0x7fffffff;
671	return new Code(instList, newHash, newStr);
672	}
673	}
674
675	// Export for testing purpose.
676	export const executeCache: LRUWithStats = new LRU(1000);
677	const calculateDepCache: LRUWithStats = new LRU(1000);
678	const flattenCache: LRUWithStats = new LRU(1000);
679	const recordTextCache: LRUWithStats = new LRU(1000);
680
681	type LineLogProps = {
682	/** Core state: instructions. The array index type is `Pc`. */
683	code: Code;
684	/** Maximum rev tracked (inclusive). */
685	maxRev: Rev;
686	};
687
688	const LineLogRecord = Record<LineLogProps>({
689	code: new Code(),
690	maxRev: 0 as Rev,
691	});
692	type LineLogRecord = RecordOf<LineLogProps>;
693
694	/**
695	* `LineLog` is a data structure that tracks linear changes to a single text
696	* file. Conceptually similar to a list of texts like `string[]`, with extra
697	* features suitable for stack editing:
698	* - Calculate the "blame" of the text of a given version efficiently.
699	* - Edit lines or trunks in a past version, and affect future versions.
700	* - List all lines that ever existed with each line annotated, like
701	* a unified diff, but for all versions, not just 2 versions.
702	*
703	* Internally, `LineLog` is a byte-code interpreter that runs a program to
704	* emit lines. Changes are done by patching in new byte-codes. There are
705	* no traditional text patch involved. No operations would cause merge
706	* conflicts. See https://sapling-scm.com/docs/internals/linelog for more
707	* details.
708	*
709	* This implementation of `LineLog` uses immutable patterns.
710	* Write operations return new `LineLog`s.
711	*/
712	class LineLog extends SelfUpdate<LineLogRecord> {
713	constructor(props?: {code?: Code; maxRev?: Rev}) {
714	const record = LineLogRecord(props);
715	super(record);
716	}
717
718	get maxRev(): Rev {
719	return this.inner.maxRev;
720	}
721
722	get code(): Code {
723	return this.inner.code;
724	}
725
726	/**
727	* Edit chunk. Replace line `a1` (inclusive) to `a2` (exclusive) in rev
728	* `aRev` with `bLines`. `bLines` are considered introduced by `bRev`.
729	* If `bLines` is empty, the edit is a deletion. If `a1` equals to `a2`,
730	* the edit is an insertion. Otherwise, the edit is a modification.
731	*
732	* While this function does not cause conflicts or error out, not all
733	* editings make practical sense. The callsite might want to do some
734	* extra checks to ensure the edit is meaningful.
735	*
736	* `aLinesCache` is optional. If provided, then `editChunk` will skip a
737	* `checkOutLines` call and modify `aLinesCache` in place to reflect
738	* the edit. It is used by `recordText`.
739	*
740	* If `blockShift` is `true`, consider shifting the insertion lines
741	* to relax dependency for easier reordering. Check the comments
742	* in this function for details.
743	*/
744	editChunk(
745	aRev: Rev,
746	a1: LineIdx,
747	a2: LineIdx,
748	bRev: Rev,
749	bLines: string[],
750	aLinesCache?: LineInfo[],
751	blockShift = true,
752	): LineLog {
753	const aLinesMutable = aLinesCache != null;
754	const aLinesInfo: [LineInfo[], true] \| [Readonly<LineInfo[]>, false] = aLinesMutable
755	? [aLinesCache, true]
756	: [this.checkOutLines(aRev), false];
757
758	const bLen = bLines.length;
759	if (a1 === a2 && bLen > 0 && blockShift) {
760	// Attempt to shift the insertion chunk so the start of insertion aligns
761	// with another "start insertion". This might trigger the [OPT1]
762	// optimization in `code.editChunk`, avoid nested insertions and enable
763	// more flexible reordering.
764	//
765	// For example, we might get "Insert (rev 3)" below that forces a nested
766	// insertion block. However, if we shift the block and use the
767	// "Alternative Insert (rev 3)", we can use the [OPT1] optimization.
768	//
769	// +----Insert (rev 1)
770	// \| Line: function a () {
771	// \| Line: return 'a';
772	// \| Line: }
773	// +----
774	// +----Insert (rev 2)
775	// \| ----+ Alternative Insert (rev 3)
776	// \| Line: \|
777	// \|+---Insert (rev 3) \|
778	// \|\| Line: function b () { \|
779	// \|\| Line: return 'b'; \|
780	// \|\| Line: } \|
781	// \|\| ----+
782	// \|\| Line:
783	// \|+---
784	// \| Line: function c () {
785	// \| Line: return 'c';
786	// \| Line: }
787	// +----
788	//
789	// Block shifting works if the surrounding lines match, see:
790	//
791	// A A
792	// B +-------+
793	// +-------+ is equivalent to \| B \|
794	// \| block \| === shift up ==> \| block \|
795	// \| B \| <== shift down === +-------+
796	// +-------+ B
797	// C C
798
799	const aLines: Readonly<LineInfo[]> = aLinesInfo[0];
800	const canUseOpt1 = (a: LineIdx): boolean => {
801	const pc = aLines.at(a)?.pc;
802	// Check [OPT1] for how this works.
803	return pc != null && pc > 0 && this.code.get(pc - 1)?.op === Op.JL;
804	};
805	if (!canUseOpt1(a1)) {
806	const considerShift = (step: 'down' \| 'up'): LineLog \| undefined => {
807	let ai = a1;
808	let lines = [...bLines];
809	// Limit overhead.
810	const threshold = 10;
811	for (let i = 0; i < threshold; ++i) {
812	// Out of range?
813	if (step === 'up' ? ai === 0 : ai === aLines.length - 1) {
814	return undefined;
815	}
816	// Surrounding lines match?
817	const [aIdx, bIdx] = step === 'up' ? [ai - 1, -1] : [ai, 0];
818	const aData = aLines.at(aIdx)?.data;
819	const bData = lines.at(bIdx);
820	if (bData !== aData \|\| bData == null) {
821	return undefined;
822	}
823	// Shift.
824	lines =
825	step === 'up' ? [bData].concat(lines.slice(0, -1)) : lines.slice(1).concat([bData]);
826	ai += step === 'up' ? -1 : 1;
827	// Good enough?
828	if (canUseOpt1(ai)) {
829	return this.editChunk(aRev, ai, ai, bRev, lines, aLinesCache, false);
830	}
831	}
832	};
833	const maybeShifted = considerShift('up') ?? considerShift('down');
834	if (maybeShifted != null) {
835	return maybeShifted;
836	}
837	}
838	}
839	const newCode = this.code.editChunk(aRev, a1, a2, bRev, bLines, aLinesInfo);
840	const newMaxRev = Math.max(bRev, this.maxRev);
841	return new LineLog({code: newCode, maxRev: newMaxRev});
842	}
843
844	/**
845	* Rewrite `rev` to `mapping[rev] ?? rev`.
846	* This can be useful for reordering, folding, or insertion.
847	*
848	* Note: There are no checks about whether the reordering is
849	* meaningful or not. The callsite is responsible to perform
850	* a dependency check and avoid troublesome reorders like
851	* moving a change to before its dependency.
852	*/
853	remapRevs(revMap: Map<Rev, Rev> \| ((rev: Rev) => Rev)): LineLog {
854	const [newCode, newMaxRev] = this.code.remapRevs(revMap);
855	return new LineLog({code: newCode, maxRev: newMaxRev});
856	}
857
858	/**
859	* Truncate linelog. Drop rev (inclusive) and higher revs.
860	*/
861	truncate(rev: Rev): LineLog {
862	const [newCode, newMaxRev] = this.code.truncate(rev);
863	return new LineLog({code: newCode, maxRev: newMaxRev});
864	}
865
866	/**
867	* Calculate the dependencies of revisions.
868	* For example, `{5: [3, 1]}` means rev 5 depends on rev 3 and rev 1.
869	*
870	* Based on LineLog, which could be different from traditional textual
871	* context-line dependencies. LineLog dependency is to prevent
872	* "malformed cases" [1] when following the dependency to `remapRevs`.
873	* Practically, LineLog might allow reorder cases that would be
874	* disallowed by traditional context-line dependencies. See tests
875	* for examples.
876	*
877	* [1]: Malformed cases are when nested blocks (insertions or deletions)
878	* might be skipped incorrectly. The outer block says "skip" and the
879	* inner block does not want to "skip" but is still skipped since it
880	* is skipped altogether with the outer block. See also section 0.4
881	* and 0.5 in D3628440.
882	*/
883	public calculateDepMap = cachedMethod(this.calculateDepMapImpl, {cache: calculateDepCache});
884
885	private calculateDepMapImpl(): Readonly<Map<Rev, Set<Rev>>> {
886	// With the insertion and deletion stacks (see explanation in
887	// visitWithInsDelStacks), when we see a new insertion block, or deletion
888	// block, we add two dependencies:
889	// - The inner rev depends on the outer insertion rev.
890	// - The outer deletion rev (if present) depends on the inner rev.
891	//
892	// Let's look at how this is done at the instruction level.
893	// the instructions generated by editChunk:
894	//
895	// a2Pc: ...
896	// ...
897	// start: JL brev b2Pc
898	// ...
899	// b2Pc: JGE brev a2Pc
900	// : <a1 Inst>
901	// end: J a1Pc+1
902	//
903	// JL is used for insertion, JGE is used for deletion. We then use them to
904	// manipulate the insStack and delStack:
905	//
906	// insStack:
907	//
908	// - On "start: JL brev b2Pc":
909	// Do not follow the JL jump. (by visitWithInsDelStacks)
910	// Mark brev as dependent on the outer insertion.
911	// Mark the outer deletion as dependent on this brev.
912	// Push {rev, b2Pc} to insStack. (by visitWithInsDelStacks)
913	// - When pc is b2Pc, pop insStack. (by visitWithInsDelStacks)
914	//
915	// delStack:
916	//
917	// - On "b2Pc: JGE brev a2Pc":
918	// Do not follow the JGE jump. (by visitWithInsDelStacks)
919	// Mark brev as dependent on the outer insertion.
920	// Mark the outer deletion as dependent on this brev.
921	// Push {rev, a2Pc} to delStack. (by visitWithInsDelStacks)
922	// - When pc is a2Pc, pop delStack. (by visitWithInsDelStacks)
923	const depMap = new Map<Rev, Set<Rev>>();
924	const addDep = (child: Rev, parent: Rev) => {
925	if (child > parent) {
926	if (!depMap.has(child)) {
927	depMap.set(child, new Set());
928	}
929	depMap.get(child)?.add(parent);
930	}
931	};
932	this.code.visitWithInsDelStacks((insStack, delStack) => {
933	const markDep = (rev: Rev) => {
934	const ins = insStack.at(-1);
935	if (ins !== undefined) {
936	addDep(rev, ins.rev);
937	}
938	const del = delStack.at(-1);
939	if (del !== undefined) {
940	addDep(del.rev, rev);
941	}
942	};
943	return {
944	onConditionalJump: inst => markDep(inst.rev),
945	};
946	});
947	return depMap;
948	}
949
950	/**
951	* Interpret the bytecodes with the given revision range.
952	* Used by `checkOut`.
953	*/
954	public execute = cachedMethod(this.executeImpl, {cache: executeCache});
955
956	private executeImpl(
957	startRev: Rev,
958	endRev: Rev = startRev,
959	present?: {[pc: number]: boolean},
960	): Readonly<LineInfo[]> {
961	const rev = endRev;
962	const lines: LineInfo[] = [];
963	let pc = 0;
964	let patience = this.code.getSize() * 2;
965	const deleted = present == null ? () => false : (pc: Pc) => !present[pc];
966	while (patience > 0) {
967	const code = nullthrows(this.code.get(pc));
968	switch (code.op) {
969	case Op.END:
970	lines.push({data: '', rev: 0, pc, deleted: deleted(pc)});
971	patience = -1;
972	break;
973	case Op.LINE:
974	lines.push({data: code.data, rev: code.rev, pc, deleted: deleted(pc)});
975	pc += 1;
976	break;
977	case Op.J:
978	pc = code.pc;
979	break;
980	case Op.JGE:
981	if (startRev >= code.rev) {
982	pc = code.pc;
983	} else {
984	pc += 1;
985	}
986	break;
987	case Op.JL:
988	if (rev < code.rev) {
989	pc = code.pc;
990	} else {
991	pc += 1;
992	}
993	break;
994	default:
995	assert(false, 'bug: unknown code');
996	}
997	patience -= 1;
998	}
999	if (patience === 0) {
1000	assert(false, 'bug: code does not end in time');
1001	}
1002	return lines;
1003	}
1004
1005	/**
1006	* Flatten lines. Each returned line is associated with a set
1007	* of `Rev`s, meaning that line is present in those `Rev`s.
1008	*
1009	* The returned lines can be useful to figure out file contents
1010	* after reordering, folding commits. It can also provide a view
1011	* similar to `absorb -e FILE` to edit all versions of a file in
1012	* a single view.
1013	*/
1014	public flatten = cachedMethod(this.flattenImpl, {cache: flattenCache});
1015
1016	private flattenImpl(): List<FlattenLine> {
1017	const result: FlattenLine[] = [];
1018
1019	// See the comments in calculateDepMap for what the stacks mean.
1020	//
1021	// The flatten algorithm works as follows:
1022	// - For each line, we got an insRev (insStack.at(-1).rev), and a
1023	// delRev (delStack.at(-1)?.rev ?? maxRev + 1), meaning the rev
1024	// attached to the innermost insertion or deletion blocks,
1025	// respectively.
1026	// - That line is then present in insRev .. delRev (exclusive) revs.
1027	//
1028	// This works because:
1029	// - The blocks are nested in order:
1030	// - For nested insertions, the nested one must have a larger rev, and
1031	// lines inside the nested block are only present starting from the
1032	// larger rev.
1033	// - For nested deletions, the nested one must have a smaller rev, and
1034	// lines inside the nested block are considered as deleted by the
1035	// smaller rev.
1036	// - For interleaved insertion and deletions, insertion rev and deletion
1037	// rev are tracked separately so their calculations are independent
1038	// from each other.
1039	// - Linelog tracks linear history, so (insRev, delRev) can be converted to
1040	// a Set<Rev>.
1041	this.code.visitWithInsDelStacks((insStack, delStack) => {
1042	const maxDelRev = this.maxRev + 1;
1043	const getCurrentRevs = (): ImSet<Rev> => {
1044	const insRev = insStack.at(-1)?.rev ?? 0;
1045	const delRev = delStack.at(-1)?.rev ?? maxDelRev;
1046	return revRangeToSet(insRev, delRev);
1047	};
1048	let currentRevs = getCurrentRevs();
1049	return {
1050	onStackPush: () => {
1051	currentRevs = getCurrentRevs();
1052	},
1053	onStackPop: () => {
1054	currentRevs = getCurrentRevs();
1055	},
1056	onLine: ({data}) => {
1057	result.push(FlattenLine({data, revs: currentRevs}));
1058	},
1059	};
1060	});
1061	return List(result);
1062	}
1063
1064	/**
1065	* Checkout the lines of the given revision `rev`.
1066	*
1067	* If `start` is not `null`, checkout a revision range. For example,
1068	* if `start` is 0, and `rev` is `this.maxRev`, `this.lines` will
1069	* include all lines ever existed in all revisions.
1070	*
1071	* @returns Content of the specified revision.
1072	*/
1073	public checkOutLines(rev: Rev, start: Rev \| null = null): Readonly<LineInfo[]> {
1074	// eslint-disable-next-line no-param-reassign
1075	rev = Math.min(rev, this.maxRev);
1076	let lines = this.execute(rev);
1077	if (start !== null) {
1078	// Checkout a range, including deleted revs.
1079	const present: {[key: number]: boolean} = {};
1080	lines.forEach(l => {
1081	present[l.pc] = true;
1082	});
1083
1084	// Go through all lines again. But do not skip chunks.
1085	lines = this.execute(start, rev, present);
1086	}
1087	return lines;
1088	}
1089
1090	/** Checkout the content of the given rev. */
1091	public checkOut(rev: Rev): string {
1092	const lines = this.checkOutLines(rev);
1093	const content = lines.map(l => l.data).join('');
1094	return content;
1095	}
1096
1097	/**
1098	* Edit LineLog to match the content of `text`.
1099	* This might affect `rev`s that are >= `rev` in the stack.
1100	* Previous revisions won't be affected.
1101	*
1102	* @param text Content to match.
1103	* @param rev Revision to to edit (in-place). If not set, append a new revision.
1104	* @returns A new `LineLog` with the change.
1105	*/
1106	public recordText = cachedMethod(this.recordTextImpl, {cache: recordTextCache});
1107
1108	private recordTextImpl(text: string, rev: Rev \| null = null): LineLog {
1109	// rev to edit from, and rev to match 'text'.
1110	const [aRev, bRev] = rev != null ? [rev, rev] : [this.maxRev, this.maxRev + 1];
1111	const b = text;
1112
1113	const aLineInfos = [...this.checkOutLines(aRev)];
1114	const bLines = splitLines(b);
1115	const aLines = aLineInfos.map(l => l.data);
1116	aLines.pop(); // Drop the last END empty line.
1117	const blocks = diffLines(aLines, bLines);
1118	// eslint-disable-next-line @typescript-eslint/no-this-alias
1119	let log: LineLog = this;
1120
1121	blocks.reverse().forEach(([a1, a2, b1, b2]) => {
1122	log = log.editChunk(aRev, a1, a2, bRev, bLines.slice(b1, b2), aLineInfos);
1123	});
1124
1125	// This is needed in case editChunk is not called (no difference).
1126	const newMaxRev = Math.max(bRev, log.maxRev);
1127
1128	// Populate cache for checking out bRev.
1129	const newLog = new LineLog({code: log.code, maxRev: newMaxRev});
1130	executeCache.set(List([newLog, bRev]), aLineInfos);
1131
1132	return newLog;
1133	}
1134	}
1135
1136	/** Turn (3, 6) to Set([3, 4, 5]). */
1137	const revRangeToSet = cached(
1138	(startRev, endRev: Rev): ImSet<Rev> => {
1139	const result: Rev[] = [];
1140	for (let rev = startRev; rev < endRev; rev++) {
1141	result.push(rev);
1142	}
1143	return ImSet(result);
1144	},
1145	{cacheSize: 1000},
1146	);
1147
1148	function describeInst(inst: Inst): string {
1149	switch (inst.op) {
1150	case Op.J:
1151	return `J ${inst.pc}`;
1152	case Op.JGE:
1153	return `JGE ${inst.rev} ${inst.pc}`;
1154	case Op.JL:
1155	return `JL ${inst.rev} ${inst.pc}`;
1156	case Op.LINE:
1157	return `LINE ${inst.rev} ${JSON.stringify(inst.data.trimEnd())}`;
1158	case Op.END:
1159	return 'END';
1160	}
1161	}
1162
1163	export {FlattenLine, LineLog};
1164	export type {LineIdx, LineInfo, Rev};
1165