38.7 KB1165 lines
Blame
1/**
2 * Portions Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8/*
9
10Copyright (c) 2020 Jun Wu
11
12Permission is hereby granted, free of charge, to any person obtaining a copy
13of this software and associated documentation files (the "Software"), to deal
14in the Software without restriction, including without limitation the rights
15to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16copies of the Software, and to permit persons to whom the Software is
17furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice shall be included in all
20copies or substantial portions of the Software.
21
22THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28SOFTWARE.
29
30*/
31
32import type {RecordOf, ValueObject} from 'immutable';
33import type {LRUWithStats} from 'shared/LRU';
34
35import {hash, Set as ImSet, List, Record} from 'immutable';
36import {cached, cachedMethod, LRU} from 'shared/LRU';
37import {diffLines, splitLines} from 'shared/diff';
38import {SelfUpdate} from 'shared/immutableExt';
39import {nullthrows} from 'shared/utils';
40import {assert} from './utils';
41
42/** Operation code. */
43enum Op {
44 /** Unconditional jump. */
45 J = 0,
46 /** Jump if the current rev >= operand. */
47 JGE = 1,
48 /** Jump if the current rev < operand. */
49 JL = 2,
50 /** Append a line. */
51 LINE = 3,
52 /** End execution. */
53 END = 4,
54}
55
56/** J instruction. */
57const J = Record(
58 {
59 /** Opcode: J */
60 op: Op.J,
61 /** Program counter (offset to jump). */
62 pc: 0,
63 },
64 'J',
65);
66type J = RecordOf<{
67 op: Op.J;
68 pc: number;
69}>;
70
71/** JGE instruction. */
72const JGE = Record(
73 {
74 /** Opcode: JGE */
75 op: Op.JGE,
76 /** `rev` to test. */
77 rev: 0,
78 /** Program counter (offset to jump). */
79 pc: 0,
80 },
81 'JGE',
82);
83type JGE = RecordOf<{
84 op: Op.JGE;
85 rev: Rev;
86 pc: number;
87}>;
88
89/** JL instruction. */
90const JL = Record(
91 {
92 /** Opcode: JL */
93 op: Op.JL,
94 /** `rev` to test. */
95 rev: 0,
96 /** Program counter (offset to jump). */
97 pc: 0,
98 },
99 'JL',
100);
101type JL = RecordOf<{
102 op: Op.JL;
103 rev: Rev;
104 pc: number;
105}>;
106
107/** LINE instruction. */
108const LINE = Record(
109 {
110 /** Opcode: LINE */
111 op: Op.LINE,
112 /** `rev` to test. */
113 rev: 0,
114 /** Line content. Includes EOL. */
115 data: '',
116 },
117 'LINE',
118);
119type LINE = RecordOf<{
120 op: Op.LINE;
121 rev: Rev;
122 data: string;
123}>;
124
125/** END instruction. */
126const END = Record(
127 {
128 /** Opcode: END */
129 op: Op.END,
130 },
131 'END',
132);
133type END = RecordOf<{
134 op: Op.END;
135}>;
136
137/** Program counter (offset to instructions). */
138type Pc = number;
139
140/** Revision number. Usually starts from 1. Larger number means newer versions.
141 *
142 * For advanced use-cases, this can be a floating number. For example, absorb
143 * might use rev 2.5 between rev2 and rev 3, indicating the chunk is currently
144 * absorbed into rev 2 (by checking out 2.999), while still preserving the rev
145 * 2 losslessly (by checking out 2, instead of 2.999). `absorb` might also use
146 * different fractional part to "tag" the chunk so they can be treated
147 * differently. For example, there are 2 chunk edits in the working copy a.txt,
148 * absorb might use `1 / 1024` to represent the first edit, and `2 / 1024` for
149 * the second chunk edit so those edits can be moved (remapped) separately.
150 * absorb might also split a chunk edit into multiple edits so the individual
151 * lines can be moved separately.
152 */
153type Rev = number;
154
155/** Index of a line. Starts from 0. */
156type LineIdx = number;
157
158/** Instruction. */
159type Inst = J | END | JGE | JL | LINE;
160
161/** Information about a line. Internal (`lines`) result of `LineLog.checkOut`. */
162interface LineInfo {
163 /** Line content. Includes EOL. */
164 data: string;
165 /** Added by the given rev. */
166 rev: Rev;
167 /** Produced by the instruction at the given offset. */
168 pc: Pc;
169 /**
170 * Whether the line is deleted.
171 * This is always `false` if `checkOut(rev, None)`.
172 * It might be `true` when checking out a range of revisions
173 * (aka. `start` passed to `checkOut` is not `null`).
174 */
175 deleted: boolean;
176}
177
178/** A "flatten" line. Result of `LineLog.flatten()`. */
179type FlattenLineProps = {
180 /** The line is present in the given revisions. */
181 revs: ImSet<Rev>;
182 /** Content of the line, including `\n`. */
183 data: string;
184};
185const FlattenLine = Record<FlattenLineProps>({revs: ImSet(), data: ''});
186type FlattenLine = RecordOf<FlattenLineProps>;
187
188/** Used by visitWithInsDelStacks */
189type Frame = {rev: Rev; endPc: Pc};
190
191/**
192 * List of instructions.
193 *
194 * This is a wrapper of `List<Inst>` for more efficient `hashCode` and `equals`
195 * calculations. The default `hashCode` from `immutable.js` scans the whole
196 * `List`. In this implementation we keep 2 internal values: hash and str. The
197 * `hash` is used for hashCode, and the `str` is an append-only string that
198 * tracks the `editChunk` and other operations to `List<Inst>` for testing
199 * equality.
200 *
201 * You might have noticed that the `str` equality might not match the
202 * `List<Inst>` equality. For example, if we remap 1 to 2, then remap 2 to 1,
203 * the `List<Inst>` is not changed, but the `str` is changed. It is okay to
204 * treat the linelogs as different in this case as we almost always immediately
205 * rebuild linelogs after a `remap`. It's important to make sure `recordText`
206 * with the same text list gets cache hit.
207 */
208class Code implements ValueObject {
209 constructor(
210 private instList: List<Inst> = List([END() as Inst]),
211 private __hash: Readonly<number> = 0,
212 private __valueOf: Readonly<string> = '',
213 ) {}
214
215 getSize(): number {
216 return this.instList.size;
217 }
218
219 get(pc: Pc): Readonly<Inst> | undefined {
220 return this.instList.get(pc);
221 }
222
223 valueOf(): string {
224 return this.__valueOf;
225 }
226
227 equals(other: Code): boolean {
228 return this.__valueOf === other.__valueOf;
229 }
230
231 hashCode(): number {
232 return this.__hash;
233 }
234
235 /**
236 * Dump instructions in a human readable format. Useful for debugging.
237 * Note: This exposes internal details which might change in the future.
238 */
239 describeHumanReadableInstructions(): string[] {
240 return this.instList.map((inst, i) => `${i}: ${describeInst(inst)}`).toArray();
241 }
242
243 /**
244 * Dump lines with ASCII annotated insertions and deletions stacks.
245 */
246 describeHumanReadableInsDelStacks(): string[] {
247 // 1st Pass: Figure out the max stack depth, line length for padding.
248 let maxInsStackDepth = 0;
249 let maxDelStackDepth = 0;
250 let maxLineLength = 'Insert (rev: 1000)'.length;
251 this.visitWithInsDelStacks((insStack, delStack) => {
252 return {
253 onStackPush: () => {
254 maxInsStackDepth = Math.max(maxInsStackDepth, insStack.length + 1);
255 maxDelStackDepth = Math.max(maxDelStackDepth, delStack.length + 2);
256 },
257 onLine: line => {
258 maxLineLength = Math.max(maxLineLength, line.data.length + 'Line: '.length);
259 },
260 };
261 });
262 // 2nd Pass: Render the instructions.
263 const result: string[] = [];
264 this.visitWithInsDelStacks((insStack, delStack) => {
265 const pushLine = (data: string, leftAdjust?: number, rightAdjust?: number) => {
266 const insDepth = insStack.length - 1 + (leftAdjust ?? 0);
267 const delDepth = delStack.length + (rightAdjust ?? 0);
268 const insPad = maxInsStackDepth - insDepth;
269 const delPad = maxDelStackDepth - delDepth;
270 const left =
271 '|'.repeat(insDepth) +
272 (leftAdjust == null ? ' '.repeat(insPad + 1) : `+${'-'.repeat(insPad)}`);
273 const right =
274 (rightAdjust == null ? ' '.repeat(delPad + 1) : `${'-'.repeat(delPad)}+`) +
275 '|'.repeat(delDepth);
276 const middle = data + ' '.repeat(maxLineLength - data.length);
277 result.push(left + middle + right);
278 };
279 return {
280 onStackPush: stack => {
281 const rev = stack.at(-1)?.rev ?? 0;
282 if (stack === insStack) {
283 // | | +------ Insert (rev x) <- this line
284 // | | | Line: .... <- following lines
285 pushLine(`Insert (rev ${rev})`, -1);
286 } else {
287 pushLine(`Delete (rev ${rev})`, undefined, -1);
288 }
289 },
290 onStackPop: stack => {
291 if (stack === insStack) {
292 pushLine('', 0);
293 } else {
294 pushLine('', undefined, 0);
295 }
296 },
297 onLine: line => {
298 pushLine(`Line: ${line.data.trimEnd()}`);
299 },
300 };
301 });
302 return result;
303 }
304
305 editChunk(
306 aRev: Rev,
307 a1: LineIdx,
308 a2: LineIdx,
309 bRev: Rev,
310 bLines: string[],
311 [aLines, aLinesMutable]: [LineInfo[], true] | [Readonly<LineInfo[]>, false],
312 ): Code {
313 const start = this.instList.size;
314
315 assert(a1 <= a2, 'illegal chunk (a1 < a2)');
316 assert(a2 <= aLines.length, 'out of bound a2 (wrong aRev?)');
317
318 // See also https://sapling-scm.com/docs/internals/linelog/#editing-linelog
319 // # Before # After
320 // # (pc): Instruction # (pc): Instruction
321 // : ... : ...
322 // a1Pc: <a1Inst> a1Pc: J start
323 // a1Pc+1: ... a1Pc+1: ...
324 // : ... : ...
325 // a2Pc: ... a2Pc: ...
326 // : ... : ...
327 // len: N/A start: JL brev b2Pc [1]
328 // : LINE brev b1 [1]
329 // : LINE brev b1+1 [1]
330 // : ... [1]
331 // : LINE brev b2-1 [1]
332 // b2Pc: JGE brev a2Pc [2]
333 // : <a1Inst> (moved) [3]
334 // : J a1Pc+1 [4]
335 // [1]: Only present if `bLines` is not empty.
336 // [2]: Only present if `a1 < a2`.
337 // There are 2 choices for "a2Pc":
338 // - The a2 line exactly: aLines[a2].pc
339 // - The next instruction of the "a2 -1" line: aLines[a2 - 1].pc + 1
340 // We pick the latter to avoid overly aggressive deletion.
341 // The original C implementation might pick the former when editing
342 // the last rev for performance optimization.
343 // [3]: <a1 Inst> could be LINE or END.
344 // [4]: As an optimization, this is only present if <a1 Inst> is not END.
345 //
346 // Optimization [OPT1] to make reorder less restrictive, treat insertion
347 // (a1 == a2) at the beginning of another insertion (<a1 Inst> is after a
348 // <JL>) specially. Our goal is to avoid nested JLs. Instead of patching
349 // the a1Inst after the JL, we patch the JL (jlInst) so we can insert our
350 // new JL (for this edit) before the old JL (jlInst, being patched).
351 // Note this "JL followed by a1Inst" optimization needs to be applicable
352 // multiple times. To do that, we also move the a1Inst to right after the
353 // jlInst so the pattern "JL followed by a1Inst" can be recognized by the
354 // next editChunk to apply the same optimization.
355 //
356 // # Before # After
357 // # (pc): Instruction # (pc): Instruction
358 // : ... : ...
359 // : <jlInst> a1Pc-1: J start [*]
360 // a1Pc: <a1Inst> a1Pc: NOP (J a1Pc+1) [*]
361 // : ... : ...
362 // len: N/A start: JL brev b2Pc
363 // : (bLines)
364 // b2Pc: <jlInst> (moved) [*]
365 // : <a1Inst> (moved)
366 // : J a1Pc [*]
367 const newInstList = this.instList.withMutations(origCode => {
368 let code = origCode;
369 const a1Pc = aLines[a1].pc;
370 // If `jlInst` is set, optimization [OPT1] is in effect.
371 let jlInst = a1Pc > 0 && a1 === a2 ? code.get(a1Pc - 1) : undefined;
372 if (jlInst?.op !== Op.JL) {
373 jlInst = undefined;
374 }
375 if (bLines.length > 0) {
376 // [1]
377 const b2Pc = start + bLines.length + 1;
378 code = code.push(JL({rev: bRev, pc: b2Pc}) as Inst);
379 bLines.forEach(line => {
380 code = code.push(LINE({rev: bRev, data: line}) as Inst);
381 });
382 assert(b2Pc === code.size, 'bug: wrong pc');
383 }
384 if (a1 < a2) {
385 assert(jlInst === undefined, 'OPT1 requires no deletion');
386 // [2]
387 const a2Pc = aLines[a2 - 1].pc + 1;
388 code = code.push(JGE({rev: bRev, pc: a2Pc}) as Inst);
389 }
390 if (aLinesMutable) {
391 aLines[a1] = {...aLines[a1], pc: jlInst == null ? code.size : code.size + 1};
392 }
393 const a1Inst = nullthrows(code.get(a1Pc));
394 if (jlInst === undefined) {
395 // [3]
396 code = code.push(a1Inst);
397 if (a1Inst.op /* LINE or END */ !== Op.END) {
398 // [4]
399 code = code.push(J({pc: a1Pc + 1}) as Inst);
400 }
401 code = code.set(a1Pc, J({pc: start}) as Inst);
402 } else {
403 code = code
404 .push(jlInst)
405 .push(a1Inst)
406 .push(J({pc: a1Pc}) as Inst)
407 .set(a1Pc - 1, J({pc: start}) as Inst)
408 .set(a1Pc, J({pc: a1Pc + 1}) as J);
409 }
410 return code;
411 });
412
413 if (aLinesMutable) {
414 const newLines = bLines.map((s, i) => {
415 return {data: s, rev: bRev, pc: start + 1 + i, deleted: false};
416 });
417 aLines.splice(a1, a2 - a1, ...newLines);
418 }
419
420 const newValueOf = `E${aRev},${a1},${a2},${bRev},${bLines.join('')}`;
421 return this.newCode(newInstList, newValueOf);
422 }
423
424 /**
425 * Visit (execute) instructions with the insertion and deletion stacks
426 * converted from JGE and JL instructions maintained by this function.
427 *
428 * See the comment in this function about how to turn JGE and JL to
429 * the stacks.
430 *
431 * For stacks like this:
432 *
433 * +---- Insertion (rev 1)
434 * | Line 1
435 * | ----+ Deletion (rev 4)
436 * | Line 2 |
437 * | +-- Insertion (rev 2) |
438 * | | Line 3 |
439 * | | --+ | Deletion (rev 3)
440 * | | Line 4 | |
441 * | +-- | |
442 * | Line 5 | |
443 * | --+ |
444 * | Line 6 |
445 * | ----+
446 * | Line 7
447 * +----
448 *
449 * When visiting "Line 3", the callsite will get insertion stack =
450 * [rev 1, rev 2] and deletion stack = [rev 4].
451 *
452 * Internally, this is done by turning conditional jumps (JGE or JL)
453 * to stack pushes, pops at the JGE or JL destinations, and follow
454 * unconditional jumps (J) as usual. For more details, see the comment
455 * inside this function.
456 *
457 * This function will call `withContext` to provide the `insStack` and
458 * `delStack` context, and expect the callsite to provide handlers it
459 * is interested in.
460 *
461 * Typical use-cases include features that need to scan all (ever existed)
462 * lines like flatten() and calculateDepMap().
463 */
464 visitWithInsDelStacks(
465 withContext: (
466 insStack: Readonly<Frame[]>,
467 delStack: Readonly<Frame[]>,
468 ) => {
469 /** Before stack pop or push */
470 onPc?: (pc: number) => void;
471 /** After stack pop, before stack push */
472 onLine?: (inst: LINE) => void;
473 /** After stack pop, before stack push */
474 onConditionalJump?: (inst: JGE | JL) => void;
475 /** After stack push */
476 onStackPush?: (stack: Readonly<Frame[]>) => void;
477 /** After stack pop */
478 onStackPop?: (stack: Readonly<Frame[]>) => void;
479 },
480 ) {
481 // How does it work? First, insertions and deletions in linelog form
482 // tree structures. For example:
483 //
484 // +---- Insertion (rev 1)
485 // | Line 1
486 // | ----+ Deletion (rev 4)
487 // | Line 2 |
488 // | +-- Insertion (rev 2) |
489 // | | Line 3 |
490 // | | --+ | Deletion (rev 3)
491 // | | Line 4 | |
492 // | +-- | |
493 // | Line 5 | |
494 // | --+ |
495 // | Line 6 |
496 // | ----+
497 // | Line 7
498 // +----
499 //
500 // Note interleaved insertions do not happen. For example, this does not
501 // happen:
502 //
503 // +---- Insertion (rev 1)
504 // | Line 1
505 // | +-- Insertion (rev 2)
506 // | | Line 2
507 // +-|--
508 // | Line 3
509 // +--
510 //
511 // Similarly, interleaved deletions do not happen. However, insertions
512 // might interleave with deletions, as shown above.
513 //
514 // Let's look at how this is done at the instruction level. First, look at
515 // the instructions generated by editChunk:
516 //
517 // a2Pc: ...
518 // ...
519 // start: JL brev b2Pc
520 // ...
521 // b2Pc: JGE brev a2Pc
522 // : <a1 Inst>
523 // end: J a1Pc+1
524 //
525 // JL is used for insertion, JGE is used for deletion. We then use them to
526 // manipulate the insStack and delStack:
527 //
528 // insStack:
529 //
530 // - On "start: JL brev b2Pc":
531 // Do not follow the JL jump.
532 // Push {rev, b2Pc} to insStack.
533 // - When pc is b2Pc, pop insStack.
534 //
535 // delStack:
536 //
537 // - On "b2Pc: JGE brev a2Pc":
538 // Do not follow the JGE jump.
539 // Push {rev, a2Pc} to delStack.
540 // - When pc is a2Pc, pop delStack.
541 //
542 // You might have noticed that we don't use the revs in LINE instructions
543 // at all. This is because that LINE rev always matches its JL rev in this
544 // implementation. In other words, the "rev" in LINE instruction is
545 // redundant as it can be inferred from JL, with an insStack. Note in the
546 // original C implementation of LineLog the LINE rev can be different from
547 // the JL rev, to deal with merges while maintaining a linear history.
548 const insStack: Frame[] = [{rev: 0, endPc: -1}];
549 const delStack: Frame[] = [];
550 const {onPc, onLine, onConditionalJump, onStackPush, onStackPop} = withContext(
551 insStack,
552 delStack,
553 );
554 let pc = 0;
555 let patience = this.getSize() * 2;
556 while (patience > 0) {
557 onPc?.(pc);
558 if (insStack.at(-1)?.endPc === pc) {
559 insStack.pop();
560 onStackPop?.(insStack);
561 }
562 if (delStack.at(-1)?.endPc === pc) {
563 delStack.pop();
564 onStackPop?.(delStack);
565 }
566 const code = nullthrows(this.get(pc));
567 switch (code.op) {
568 case Op.LINE:
569 onLine?.(code);
570 pc += 1;
571 break;
572 case Op.END:
573 patience = -1;
574 break;
575 case Op.J:
576 pc = code.pc;
577 break;
578 case Op.JGE:
579 onConditionalJump?.(code);
580 delStack.push({rev: code.rev, endPc: code.pc});
581 onStackPush?.(delStack);
582 pc += 1;
583 break;
584 case Op.JL:
585 onConditionalJump?.(code);
586 insStack.push({rev: code.rev, endPc: code.pc});
587 onStackPush?.(insStack);
588 pc += 1;
589 break;
590 default:
591 assert(false, 'bug: unknown code');
592 }
593 patience -= 1;
594 }
595 if (patience === 0) {
596 assert(false, 'bug: code does not end in time');
597 }
598 }
599
600 remapRevs(revMapOrFunc: Map<Rev, Rev> | ((rev: Rev) => Rev)): [Code, Rev] {
601 let revMap = new Map<Rev, Rev>();
602 if (typeof revMapOrFunc === 'function') {
603 this.instList.forEach(inst => {
604 const rev = (inst as RecordOf<{rev?: number}>).rev;
605 if (rev != null && !revMap.has(rev)) {
606 revMap.set(rev, revMapOrFunc(rev));
607 }
608 });
609 } else {
610 revMap = revMapOrFunc;
611 }
612
613 const valueOfString = [...revMap.entries()].toString();
614 return this.rewriteInsts((c, _pc) => {
615 if (c.op === Op.JGE || c.op === Op.JL || c.op === Op.LINE) {
616 const newRev = revMap.get(c.rev) ?? c.rev;
617 // TypeScript cannot prove `c` has `rev`. Ideally it can figure out it automatically.
618 return (c as RecordOf<{rev: number}>).set('rev', newRev) as Inst;
619 }
620 return c;
621 }, valueOfString);
622 }
623
624 /**
625 * Rewrite all instructions. Returns [newCode, newMaxRev].
626 * `valueOfString` sets a unique string that represents the change for equal tests.
627 */
628 rewriteInsts(func: (inst: Inst, pc: Pc) => Inst, valueOfString?: string): [Code, Rev] {
629 let newMaxRev = 0;
630 const newInstList = this.instList
631 .map((inst, pc) => {
632 const newInst = func(inst, pc);
633 const newRev = (newInst as RecordOf<{rev: number}>).rev;
634 if (newRev != null && newRev > newMaxRev) {
635 newMaxRev = newRev;
636 }
637 return newInst;
638 })
639 .toList();
640 const newValueOf = `R${valueOfString ?? func}`;
641 const newCode = this.newCode(newInstList, newValueOf);
642 return [newCode, newMaxRev];
643 }
644
645 /**
646 * Drop edits for revs >= the given rev. Returns [newCode, newMaxRev].
647 */
648 truncate(rev: Rev): [Code, Rev] {
649 const valueOfString = `TRUNC${rev}`;
650 return this.rewriteInsts((inst, pc) => {
651 if (inst.op === Op.JGE || inst.op === Op.LINE) {
652 if (inst.rev >= rev) {
653 // NOP.
654 return J({pc: pc + 1}) as Inst;
655 }
656 } else if (inst.op === Op.JL) {
657 if (inst.rev >= rev) {
658 // Unconditional jump.
659 return J({pc: inst.pc}) as Inst;
660 }
661 }
662 return inst;
663 }, valueOfString);
664 }
665
666 private newCode(instList: List<Inst>, newValueOf: string): Code {
667 const newStr = this.__valueOf + '\0' + newValueOf;
668 // We want bitwise operations.
669 // eslint-disable-next-line no-bitwise
670 const newHash = (this.__hash * 23 + hash(newValueOf)) & 0x7fffffff;
671 return new Code(instList, newHash, newStr);
672 }
673}
674
675// Export for testing purpose.
676export const executeCache: LRUWithStats = new LRU(1000);
677const calculateDepCache: LRUWithStats = new LRU(1000);
678const flattenCache: LRUWithStats = new LRU(1000);
679const recordTextCache: LRUWithStats = new LRU(1000);
680
681type LineLogProps = {
682 /** Core state: instructions. The array index type is `Pc`. */
683 code: Code;
684 /** Maximum rev tracked (inclusive). */
685 maxRev: Rev;
686};
687
688const LineLogRecord = Record<LineLogProps>({
689 code: new Code(),
690 maxRev: 0 as Rev,
691});
692type LineLogRecord = RecordOf<LineLogProps>;
693
694/**
695 * `LineLog` is a data structure that tracks linear changes to a single text
696 * file. Conceptually similar to a list of texts like `string[]`, with extra
697 * features suitable for stack editing:
698 * - Calculate the "blame" of the text of a given version efficiently.
699 * - Edit lines or trunks in a past version, and affect future versions.
700 * - List all lines that ever existed with each line annotated, like
701 * a unified diff, but for all versions, not just 2 versions.
702 *
703 * Internally, `LineLog` is a byte-code interpreter that runs a program to
704 * emit lines. Changes are done by patching in new byte-codes. There are
705 * no traditional text patch involved. No operations would cause merge
706 * conflicts. See https://sapling-scm.com/docs/internals/linelog for more
707 * details.
708 *
709 * This implementation of `LineLog` uses immutable patterns.
710 * Write operations return new `LineLog`s.
711 */
712class LineLog extends SelfUpdate<LineLogRecord> {
713 constructor(props?: {code?: Code; maxRev?: Rev}) {
714 const record = LineLogRecord(props);
715 super(record);
716 }
717
718 get maxRev(): Rev {
719 return this.inner.maxRev;
720 }
721
722 get code(): Code {
723 return this.inner.code;
724 }
725
726 /**
727 * Edit chunk. Replace line `a1` (inclusive) to `a2` (exclusive) in rev
728 * `aRev` with `bLines`. `bLines` are considered introduced by `bRev`.
729 * If `bLines` is empty, the edit is a deletion. If `a1` equals to `a2`,
730 * the edit is an insertion. Otherwise, the edit is a modification.
731 *
732 * While this function does not cause conflicts or error out, not all
733 * editings make practical sense. The callsite might want to do some
734 * extra checks to ensure the edit is meaningful.
735 *
736 * `aLinesCache` is optional. If provided, then `editChunk` will skip a
737 * `checkOutLines` call and modify `aLinesCache` *in place* to reflect
738 * the edit. It is used by `recordText`.
739 *
740 * If `blockShift` is `true`, consider shifting the insertion lines
741 * to relax dependency for easier reordering. Check the comments
742 * in this function for details.
743 */
744 editChunk(
745 aRev: Rev,
746 a1: LineIdx,
747 a2: LineIdx,
748 bRev: Rev,
749 bLines: string[],
750 aLinesCache?: LineInfo[],
751 blockShift = true,
752 ): LineLog {
753 const aLinesMutable = aLinesCache != null;
754 const aLinesInfo: [LineInfo[], true] | [Readonly<LineInfo[]>, false] = aLinesMutable
755 ? [aLinesCache, true]
756 : [this.checkOutLines(aRev), false];
757
758 const bLen = bLines.length;
759 if (a1 === a2 && bLen > 0 && blockShift) {
760 // Attempt to shift the insertion chunk so the start of insertion aligns
761 // with another "start insertion". This might trigger the [OPT1]
762 // optimization in `code.editChunk`, avoid nested insertions and enable
763 // more flexible reordering.
764 //
765 // For example, we might get "Insert (rev 3)" below that forces a nested
766 // insertion block. However, if we shift the block and use the
767 // "Alternative Insert (rev 3)", we can use the [OPT1] optimization.
768 //
769 // +----Insert (rev 1)
770 // | Line: function a () {
771 // | Line: return 'a';
772 // | Line: }
773 // +----
774 // +----Insert (rev 2)
775 // | ----+ Alternative Insert (rev 3)
776 // | Line: |
777 // |+---Insert (rev 3) |
778 // || Line: function b () { |
779 // || Line: return 'b'; |
780 // || Line: } |
781 // || ----+
782 // || Line:
783 // |+---
784 // | Line: function c () {
785 // | Line: return 'c';
786 // | Line: }
787 // +----
788 //
789 // Block shifting works if the surrounding lines match, see:
790 //
791 // A A
792 // B +-------+
793 // +-------+ is equivalent to | B |
794 // | block | === shift up ==> | block |
795 // | B | <== shift down === +-------+
796 // +-------+ B
797 // C C
798
799 const aLines: Readonly<LineInfo[]> = aLinesInfo[0];
800 const canUseOpt1 = (a: LineIdx): boolean => {
801 const pc = aLines.at(a)?.pc;
802 // Check [OPT1] for how this works.
803 return pc != null && pc > 0 && this.code.get(pc - 1)?.op === Op.JL;
804 };
805 if (!canUseOpt1(a1)) {
806 const considerShift = (step: 'down' | 'up'): LineLog | undefined => {
807 let ai = a1;
808 let lines = [...bLines];
809 // Limit overhead.
810 const threshold = 10;
811 for (let i = 0; i < threshold; ++i) {
812 // Out of range?
813 if (step === 'up' ? ai === 0 : ai === aLines.length - 1) {
814 return undefined;
815 }
816 // Surrounding lines match?
817 const [aIdx, bIdx] = step === 'up' ? [ai - 1, -1] : [ai, 0];
818 const aData = aLines.at(aIdx)?.data;
819 const bData = lines.at(bIdx);
820 if (bData !== aData || bData == null) {
821 return undefined;
822 }
823 // Shift.
824 lines =
825 step === 'up' ? [bData].concat(lines.slice(0, -1)) : lines.slice(1).concat([bData]);
826 ai += step === 'up' ? -1 : 1;
827 // Good enough?
828 if (canUseOpt1(ai)) {
829 return this.editChunk(aRev, ai, ai, bRev, lines, aLinesCache, false);
830 }
831 }
832 };
833 const maybeShifted = considerShift('up') ?? considerShift('down');
834 if (maybeShifted != null) {
835 return maybeShifted;
836 }
837 }
838 }
839 const newCode = this.code.editChunk(aRev, a1, a2, bRev, bLines, aLinesInfo);
840 const newMaxRev = Math.max(bRev, this.maxRev);
841 return new LineLog({code: newCode, maxRev: newMaxRev});
842 }
843
844 /**
845 * Rewrite `rev` to `mapping[rev] ?? rev`.
846 * This can be useful for reordering, folding, or insertion.
847 *
848 * Note: There are no checks about whether the reordering is
849 * meaningful or not. The callsite is responsible to perform
850 * a dependency check and avoid troublesome reorders like
851 * moving a change to before its dependency.
852 */
853 remapRevs(revMap: Map<Rev, Rev> | ((rev: Rev) => Rev)): LineLog {
854 const [newCode, newMaxRev] = this.code.remapRevs(revMap);
855 return new LineLog({code: newCode, maxRev: newMaxRev});
856 }
857
858 /**
859 * Truncate linelog. Drop rev (inclusive) and higher revs.
860 */
861 truncate(rev: Rev): LineLog {
862 const [newCode, newMaxRev] = this.code.truncate(rev);
863 return new LineLog({code: newCode, maxRev: newMaxRev});
864 }
865
866 /**
867 * Calculate the dependencies of revisions.
868 * For example, `{5: [3, 1]}` means rev 5 depends on rev 3 and rev 1.
869 *
870 * Based on LineLog, which could be different from traditional textual
871 * context-line dependencies. LineLog dependency is to prevent
872 * "malformed cases" [1] when following the dependency to `remapRevs`.
873 * Practically, LineLog might allow reorder cases that would be
874 * disallowed by traditional context-line dependencies. See tests
875 * for examples.
876 *
877 * [1]: Malformed cases are when nested blocks (insertions or deletions)
878 * might be skipped incorrectly. The outer block says "skip" and the
879 * inner block does not want to "skip" but is still skipped since it
880 * is skipped altogether with the outer block. See also section 0.4
881 * and 0.5 in D3628440.
882 */
883 public calculateDepMap = cachedMethod(this.calculateDepMapImpl, {cache: calculateDepCache});
884
885 private calculateDepMapImpl(): Readonly<Map<Rev, Set<Rev>>> {
886 // With the insertion and deletion stacks (see explanation in
887 // visitWithInsDelStacks), when we see a new insertion block, or deletion
888 // block, we add two dependencies:
889 // - The inner rev depends on the outer insertion rev.
890 // - The outer deletion rev (if present) depends on the inner rev.
891 //
892 // Let's look at how this is done at the instruction level.
893 // the instructions generated by editChunk:
894 //
895 // a2Pc: ...
896 // ...
897 // start: JL brev b2Pc
898 // ...
899 // b2Pc: JGE brev a2Pc
900 // : <a1 Inst>
901 // end: J a1Pc+1
902 //
903 // JL is used for insertion, JGE is used for deletion. We then use them to
904 // manipulate the insStack and delStack:
905 //
906 // insStack:
907 //
908 // - On "start: JL brev b2Pc":
909 // Do not follow the JL jump. (by visitWithInsDelStacks)
910 // Mark brev as dependent on the outer insertion.
911 // Mark the outer deletion as dependent on this brev.
912 // Push {rev, b2Pc} to insStack. (by visitWithInsDelStacks)
913 // - When pc is b2Pc, pop insStack. (by visitWithInsDelStacks)
914 //
915 // delStack:
916 //
917 // - On "b2Pc: JGE brev a2Pc":
918 // Do not follow the JGE jump. (by visitWithInsDelStacks)
919 // Mark brev as dependent on the outer insertion.
920 // Mark the outer deletion as dependent on this brev.
921 // Push {rev, a2Pc} to delStack. (by visitWithInsDelStacks)
922 // - When pc is a2Pc, pop delStack. (by visitWithInsDelStacks)
923 const depMap = new Map<Rev, Set<Rev>>();
924 const addDep = (child: Rev, parent: Rev) => {
925 if (child > parent) {
926 if (!depMap.has(child)) {
927 depMap.set(child, new Set());
928 }
929 depMap.get(child)?.add(parent);
930 }
931 };
932 this.code.visitWithInsDelStacks((insStack, delStack) => {
933 const markDep = (rev: Rev) => {
934 const ins = insStack.at(-1);
935 if (ins !== undefined) {
936 addDep(rev, ins.rev);
937 }
938 const del = delStack.at(-1);
939 if (del !== undefined) {
940 addDep(del.rev, rev);
941 }
942 };
943 return {
944 onConditionalJump: inst => markDep(inst.rev),
945 };
946 });
947 return depMap;
948 }
949
950 /**
951 * Interpret the bytecodes with the given revision range.
952 * Used by `checkOut`.
953 */
954 public execute = cachedMethod(this.executeImpl, {cache: executeCache});
955
956 private executeImpl(
957 startRev: Rev,
958 endRev: Rev = startRev,
959 present?: {[pc: number]: boolean},
960 ): Readonly<LineInfo[]> {
961 const rev = endRev;
962 const lines: LineInfo[] = [];
963 let pc = 0;
964 let patience = this.code.getSize() * 2;
965 const deleted = present == null ? () => false : (pc: Pc) => !present[pc];
966 while (patience > 0) {
967 const code = nullthrows(this.code.get(pc));
968 switch (code.op) {
969 case Op.END:
970 lines.push({data: '', rev: 0, pc, deleted: deleted(pc)});
971 patience = -1;
972 break;
973 case Op.LINE:
974 lines.push({data: code.data, rev: code.rev, pc, deleted: deleted(pc)});
975 pc += 1;
976 break;
977 case Op.J:
978 pc = code.pc;
979 break;
980 case Op.JGE:
981 if (startRev >= code.rev) {
982 pc = code.pc;
983 } else {
984 pc += 1;
985 }
986 break;
987 case Op.JL:
988 if (rev < code.rev) {
989 pc = code.pc;
990 } else {
991 pc += 1;
992 }
993 break;
994 default:
995 assert(false, 'bug: unknown code');
996 }
997 patience -= 1;
998 }
999 if (patience === 0) {
1000 assert(false, 'bug: code does not end in time');
1001 }
1002 return lines;
1003 }
1004
1005 /**
1006 * Flatten lines. Each returned line is associated with a set
1007 * of `Rev`s, meaning that line is present in those `Rev`s.
1008 *
1009 * The returned lines can be useful to figure out file contents
1010 * after reordering, folding commits. It can also provide a view
1011 * similar to `absorb -e FILE` to edit all versions of a file in
1012 * a single view.
1013 */
1014 public flatten = cachedMethod(this.flattenImpl, {cache: flattenCache});
1015
1016 private flattenImpl(): List<FlattenLine> {
1017 const result: FlattenLine[] = [];
1018
1019 // See the comments in calculateDepMap for what the stacks mean.
1020 //
1021 // The flatten algorithm works as follows:
1022 // - For each line, we got an insRev (insStack.at(-1).rev), and a
1023 // delRev (delStack.at(-1)?.rev ?? maxRev + 1), meaning the rev
1024 // attached to the innermost insertion or deletion blocks,
1025 // respectively.
1026 // - That line is then present in insRev .. delRev (exclusive) revs.
1027 //
1028 // This works because:
1029 // - The blocks are nested in order:
1030 // - For nested insertions, the nested one must have a larger rev, and
1031 // lines inside the nested block are only present starting from the
1032 // larger rev.
1033 // - For nested deletions, the nested one must have a smaller rev, and
1034 // lines inside the nested block are considered as deleted by the
1035 // smaller rev.
1036 // - For interleaved insertion and deletions, insertion rev and deletion
1037 // rev are tracked separately so their calculations are independent
1038 // from each other.
1039 // - Linelog tracks linear history, so (insRev, delRev) can be converted to
1040 // a Set<Rev>.
1041 this.code.visitWithInsDelStacks((insStack, delStack) => {
1042 const maxDelRev = this.maxRev + 1;
1043 const getCurrentRevs = (): ImSet<Rev> => {
1044 const insRev = insStack.at(-1)?.rev ?? 0;
1045 const delRev = delStack.at(-1)?.rev ?? maxDelRev;
1046 return revRangeToSet(insRev, delRev);
1047 };
1048 let currentRevs = getCurrentRevs();
1049 return {
1050 onStackPush: () => {
1051 currentRevs = getCurrentRevs();
1052 },
1053 onStackPop: () => {
1054 currentRevs = getCurrentRevs();
1055 },
1056 onLine: ({data}) => {
1057 result.push(FlattenLine({data, revs: currentRevs}));
1058 },
1059 };
1060 });
1061 return List(result);
1062 }
1063
1064 /**
1065 * Checkout the lines of the given revision `rev`.
1066 *
1067 * If `start` is not `null`, checkout a revision range. For example,
1068 * if `start` is 0, and `rev` is `this.maxRev`, `this.lines` will
1069 * include all lines ever existed in all revisions.
1070 *
1071 * @returns Content of the specified revision.
1072 */
1073 public checkOutLines(rev: Rev, start: Rev | null = null): Readonly<LineInfo[]> {
1074 // eslint-disable-next-line no-param-reassign
1075 rev = Math.min(rev, this.maxRev);
1076 let lines = this.execute(rev);
1077 if (start !== null) {
1078 // Checkout a range, including deleted revs.
1079 const present: {[key: number]: boolean} = {};
1080 lines.forEach(l => {
1081 present[l.pc] = true;
1082 });
1083
1084 // Go through all lines again. But do not skip chunks.
1085 lines = this.execute(start, rev, present);
1086 }
1087 return lines;
1088 }
1089
1090 /** Checkout the content of the given rev. */
1091 public checkOut(rev: Rev): string {
1092 const lines = this.checkOutLines(rev);
1093 const content = lines.map(l => l.data).join('');
1094 return content;
1095 }
1096
1097 /**
1098 * Edit LineLog to match the content of `text`.
1099 * This might affect `rev`s that are >= `rev` in the stack.
1100 * Previous revisions won't be affected.
1101 *
1102 * @param text Content to match.
1103 * @param rev Revision to to edit (in-place). If not set, append a new revision.
1104 * @returns A new `LineLog` with the change.
1105 */
1106 public recordText = cachedMethod(this.recordTextImpl, {cache: recordTextCache});
1107
1108 private recordTextImpl(text: string, rev: Rev | null = null): LineLog {
1109 // rev to edit from, and rev to match 'text'.
1110 const [aRev, bRev] = rev != null ? [rev, rev] : [this.maxRev, this.maxRev + 1];
1111 const b = text;
1112
1113 const aLineInfos = [...this.checkOutLines(aRev)];
1114 const bLines = splitLines(b);
1115 const aLines = aLineInfos.map(l => l.data);
1116 aLines.pop(); // Drop the last END empty line.
1117 const blocks = diffLines(aLines, bLines);
1118 // eslint-disable-next-line @typescript-eslint/no-this-alias
1119 let log: LineLog = this;
1120
1121 blocks.reverse().forEach(([a1, a2, b1, b2]) => {
1122 log = log.editChunk(aRev, a1, a2, bRev, bLines.slice(b1, b2), aLineInfos);
1123 });
1124
1125 // This is needed in case editChunk is not called (no difference).
1126 const newMaxRev = Math.max(bRev, log.maxRev);
1127
1128 // Populate cache for checking out bRev.
1129 const newLog = new LineLog({code: log.code, maxRev: newMaxRev});
1130 executeCache.set(List([newLog, bRev]), aLineInfos);
1131
1132 return newLog;
1133 }
1134}
1135
1136/** Turn (3, 6) to Set([3, 4, 5]). */
1137const revRangeToSet = cached(
1138 (startRev, endRev: Rev): ImSet<Rev> => {
1139 const result: Rev[] = [];
1140 for (let rev = startRev; rev < endRev; rev++) {
1141 result.push(rev);
1142 }
1143 return ImSet(result);
1144 },
1145 {cacheSize: 1000},
1146);
1147
1148function describeInst(inst: Inst): string {
1149 switch (inst.op) {
1150 case Op.J:
1151 return `J ${inst.pc}`;
1152 case Op.JGE:
1153 return `JGE ${inst.rev} ${inst.pc}`;
1154 case Op.JL:
1155 return `JL ${inst.rev} ${inst.pc}`;
1156 case Op.LINE:
1157 return `LINE ${inst.rev} ${JSON.stringify(inst.data.trimEnd())}`;
1158 case Op.END:
1159 return 'END';
1160 }
1161}
1162
1163export {FlattenLine, LineLog};
1164export type {LineIdx, LineInfo, Rev};
1165