11.9 KB389 lines
Blame
1/**
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8import type {RecordOf} from 'immutable';
9import type {Hash} from '../types';
10import type {SetLike} from './set';
11
12import {Map as ImMap, List, Record} from 'immutable';
13import {LRU, cachedMethod} from 'shared/LRU';
14import {SelfUpdate} from 'shared/immutableExt';
15import {nullthrows} from 'shared/utils';
16import {HashSet} from './set';
17
18/**
19 * Hash-map like container with graph related queries.
20 *
21 * Unlike a traditional source control dag, this works more like a map:
22 * - add() does not require parents to (recursively) exist.
23 * - remove() does not remove descendants (recursively).
24 *
25 * The `BaseDag` is a minimal implementation for core graph query
26 * needs (like heads, roots, range, ancestors, gca, etc).
27 * For use-cases that involve commit attributes, like phases, bookmarks,
28 * successors, use `dag/Dag` instead.
29 *
30 * Internally maintains a "parent -> child" mapping for efficient queries.
31 * All queries, regardless of the input size, are O(N) worst case. None
32 * should be O(N^2).
33 */
34export class BaseDag<C extends HashWithParents> extends SelfUpdate<BaseDagRecord<C>> {
35 constructor(record?: BaseDagRecord<C>) {
36 super(record ?? (EMPTY_DAG_RECORD as BaseDagRecord<C>));
37 }
38
39 // Edit
40
41 /**
42 * Add commits. Parents do not have to be added first.
43 * If a commit with the same hash already exists, it will be replaced.
44 */
45 add(commits: Iterable<C>): BaseDag<C> {
46 const commitArray = [...commits];
47 const dag = this.remove(commitArray.map(c => c.hash));
48 let {childMap, infoMap} = dag;
49 for (const commit of commitArray) {
50 commit.parents.forEach(p => {
51 const children = childMap.get(p);
52 const child = commit.hash;
53 const newChildren =
54 children == null
55 ? List([child])
56 : children.contains(child)
57 ? children
58 : children.push(child);
59 childMap = childMap.set(p, newChildren);
60 });
61 infoMap = infoMap.set(commit.hash, commit);
62 }
63 const record = dag.inner.merge({infoMap, childMap});
64 return new BaseDag(record);
65 }
66
67 /** Remove commits by hash. Descendants are not removed automatically. */
68 remove(set: SetLike): BaseDag<C> {
69 const hashSet = HashSet.fromHashes(set);
70 let {childMap, infoMap} = this;
71 for (const hash of hashSet) {
72 const commit = this.get(hash);
73 if (commit == undefined) {
74 continue;
75 }
76 commit.parents.forEach(p => {
77 const children = childMap.get(p);
78 if (children != null) {
79 const newChildren = children.filter(h => h !== hash);
80 childMap = childMap.set(p, newChildren);
81 }
82 });
83 infoMap = infoMap.remove(hash);
84 }
85 const record = this.inner.merge({infoMap, childMap});
86 return new BaseDag(record);
87 }
88
89 // Basic query
90
91 get(hash: Hash | undefined | null): Readonly<C> | undefined {
92 return hash == null ? undefined : this.infoMap.get(hash);
93 }
94
95 has(hash: Hash | undefined | null): boolean {
96 return this.get(hash) !== undefined;
97 }
98
99 [Symbol.iterator](): IterableIterator<Hash> {
100 return this.infoMap.keys();
101 }
102
103 values(): Iterable<Readonly<C>> {
104 return this.infoMap.values();
105 }
106
107 /** Get parent hashes. Only return hashes present in this.infoMap. */
108 parentHashes(hash: Hash): Readonly<Hash[]> {
109 return this.infoMap.get(hash)?.parents?.filter(p => this.infoMap.has(p)) ?? [];
110 }
111
112 /** Get child hashes. Only return hashes present in this.infoMap. */
113 childHashes(hash: Hash): List<Hash> {
114 if (!this.infoMap.has(hash)) {
115 return EMPTY_LIST;
116 }
117 return this.childMap.get(hash) ?? EMPTY_LIST;
118 }
119
120 // High-level query
121
122 parents(set: SetLike): HashSet {
123 return flatMap(set, h => this.parentHashes(h));
124 }
125
126 children(set: SetLike): HashSet {
127 return flatMap(set, h => this.childHashes(h));
128 }
129
130 /**
131 * set + parents(set) + parents(parents(set)) + ...
132 * If `within` is set, change `parents` to only return hashes within `within`.
133 */
134 ancestors = cachedMethod(this.ancestorsImpl, {cache: ancestorsCache});
135 ancestorsImpl(set: SetLike, props?: {within?: SetLike}): HashSet {
136 const filter = nullableWithinContains(props?.within);
137 return unionFlatMap(set, h => this.parentHashes(h).filter(filter));
138 }
139
140 /**
141 * set + children(set) + children(children(set)) + ...
142 * If `within` is set, change `children` to only return hashes within `within`.
143 */
144 descendants(set: SetLike, props?: {within?: SetLike}): HashSet {
145 const filter = nullableWithinContains(props?.within);
146 return unionFlatMap(set, h => this.childHashes(h).filter(filter));
147 }
148
149 /** ancestors(heads) & descendants(roots) */
150 range(roots: SetLike, heads: SetLike): HashSet {
151 // PERF: This is not the most efficient, but easy to write.
152 const ancestors = this.ancestors(heads);
153 return ancestors.intersect(this.descendants(roots, {within: ancestors}));
154 }
155
156 /** set - children(set) */
157 roots(set: SetLike): HashSet {
158 const children = this.children(set);
159 return HashSet.fromHashes(set).subtract(children);
160 }
161
162 /** set - parents(set) */
163 heads(set: SetLike): HashSet {
164 const parents = this.parents(set);
165 return HashSet.fromHashes(set).subtract(parents);
166 }
167
168 /** Greatest common ancestor. heads(ancestors(set1) & ancestors(set2)). */
169 gca(set1: SetLike, set2: SetLike): HashSet {
170 return this.heads(this.ancestors(set1).intersect(this.ancestors(set2)));
171 }
172
173 /** ancestor in ancestors(descendant) */
174 isAncestor(ancestor: Hash, descendant: Hash): boolean {
175 // PERF: This is not the most efficient, but easy to write.
176 return this.ancestors(descendant).contains(ancestor);
177 }
178
179 /** Return hashes present in this dag. */
180 present(set: SetLike): HashSet {
181 const hashes = HashSet.fromHashes(set)
182 .toHashes()
183 .filter(h => this.has(h));
184 return HashSet.fromHashes(hashes);
185 }
186
187 /**
188 * Return commits that match the given condition.
189 * This can be useful for things like "obsolete()".
190 * `set`, if not undefined, limits the search space.
191 */
192 filter(predicate: (commit: Readonly<C>) => boolean, set?: SetLike): HashSet {
193 let hashes: SetLike;
194 if (set === undefined) {
195 hashes = this.infoMap.filter((commit, _hash) => predicate(commit)).keys();
196 } else {
197 hashes = HashSet.fromHashes(set)
198 .toHashes()
199 .filter(h => {
200 const c = this.get(h);
201 return c != undefined && predicate(c);
202 });
203 }
204 return HashSet.fromHashes(hashes);
205 }
206
207 /**
208 * Topologically sort hashes from roots to heads.
209 *
210 * When there are multiple children (or roots), 'compare' breaks ties.
211 * Attempt to avoid interleved branches by outputting a branch continuously
212 * until completing the branch or hitting a merge.
213 *
214 * With `gap` set to `true`, there could be "gap" in `set`. For example,
215 * with the graph x--y--z, the `set` can be ['x', 'z'] and sort will work
216 * as expected, with the downside of being O(dag size). If you run sort
217 * in a tight loop, consider setting `gap` to `false` for performance.
218 */
219 sortAsc(set: SetLike, props?: SortProps<C>): Array<Hash> {
220 const iter = this.sortImpl(
221 set,
222 props?.compare ?? ((a, b) => (a.hash < b.hash ? -1 : 1)),
223 s => this.roots(s),
224 h => this.childHashes(h),
225 h => this.parents(h),
226 props?.gap ?? true,
227 );
228 return [...iter];
229 }
230
231 /**
232 * Topologically sort hashes from heads to roots.
233 *
234 * See `sortAsc` for details.
235 */
236 sortDesc(set: SetLike, props?: SortProps<C>): Array<Hash> {
237 const iter = this.sortImpl(
238 set,
239 props?.compare ?? ((a, b) => (a.hash < b.hash ? 1 : -1)),
240 s => this.heads(s),
241 h => List(this.parentHashes(h)),
242 h => this.children(h),
243 props?.gap ?? true,
244 );
245 return [...iter];
246 }
247
248 // DFS from 'start' to 'next'
249 // not considering branch length.
250 private *sortImpl(
251 set: SetLike,
252 compare: (a: C, b: C) => number,
253 getStart: (set: HashSet) => HashSet,
254 getNext: (hash: Hash) => List<Hash>,
255 getPrev: (hash: Hash) => HashSet,
256 gap: boolean,
257 ): Generator<Hash> {
258 const hashSet = this.present(set);
259 // There might be "gaps" in hashSet. For example, set might be 'x' and 'z'
260 // in a 'x--y--z' graph. We need to fill the gap ('y') to sort properly.
261 // However, range(set, set) is O(dag.size) so we allow the callsite to
262 // disable this feature with an option.
263 const filledSet = gap ? this.range(hashSet, hashSet) : hashSet;
264 const alreadyVisited = new Set<Hash>();
265 // We use concat and pop (not unshift) so the order is reversed.
266 const compareHash = (a: Hash, b: Hash) =>
267 -compare(nullthrows(this.get(a)), nullthrows(this.get(b)));
268 // The number of parents remaining to be visited. This ensures merges are not
269 // outputted until all parents are outputted.
270 const remaining = new Map<Hash, number>(
271 filledSet.toSeq().map(h => [h, getPrev(h).intersect(filledSet).size]),
272 );
273 // List is used as a dequeue.
274 let toVisit = getStart(filledSet).toHashes().toList().sort(compareHash);
275 while (true) {
276 // pop front
277 const next = toVisit.last();
278 if (next == null) {
279 break;
280 }
281 toVisit = toVisit.pop();
282 if (alreadyVisited.has(next)) {
283 continue;
284 }
285 // Need to wait for visiting other parents first?
286 if ((remaining.get(next) ?? 0) > 0) {
287 toVisit = toVisit.unshift(next);
288 continue;
289 }
290 // Output it.
291 if (hashSet.contains(next)) {
292 yield next;
293 }
294 alreadyVisited.add(next);
295 // Visit next.
296 const children = getNext(next).sort(compareHash);
297 for (const c of children) {
298 remaining.set(c, (remaining.get(c) ?? 1) - 1);
299 }
300 toVisit = toVisit.concat(children);
301 }
302 }
303
304 // Delegates
305
306 get infoMap(): ImMap<Hash, Readonly<C>> {
307 return this.inner.infoMap;
308 }
309
310 get childMap(): ImMap<Hash, List<Hash>> {
311 return this.inner.childMap;
312 }
313
314 // Filters.
315
316 merge(set?: SetLike): HashSet {
317 return this.filter(c => c.parents.length > 1, set);
318 }
319}
320
321const ancestorsCache = new LRU(1000);
322
323function flatMap(set: SetLike, f: (h: Hash) => List<Hash> | Readonly<Array<Hash>>): HashSet {
324 return new HashSet(
325 HashSet.fromHashes(set)
326 .toHashes()
327 .flatMap(h => f(h)),
328 );
329}
330
331/** set + flatMap(set, f) + flatMap(flatMap(set, f), f) + ... */
332export function unionFlatMap(
333 set: SetLike,
334 f: (h: Hash) => List<Hash> | Readonly<Array<Hash>>,
335): HashSet {
336 let result = new HashSet().toHashes();
337 let newHashes = [...HashSet.fromHashes(set)];
338 while (newHashes.length > 0) {
339 result = result.concat(newHashes);
340 const nextNewHashes: Hash[] = [];
341 newHashes.forEach(h => {
342 f(h).forEach(v => {
343 if (!result.contains(v)) {
344 nextNewHashes.push(v);
345 }
346 });
347 });
348 newHashes = nextNewHashes;
349 }
350 return HashSet.fromHashes(result);
351}
352
353export type SortProps<C> = {compare?: (a: C, b: C) => number; gap?: boolean};
354
355/**
356 * If `set` is undefined, return a function that always returns true.
357 * Otherwise, return a function that checks whether `set` contains `h`.
358 */
359function nullableWithinContains(set?: SetLike): (h: Hash) => boolean {
360 if (set === undefined) {
361 return _h => true;
362 } else {
363 const hashSet = HashSet.fromHashes(set);
364 return h => hashSet.contains(h);
365 }
366}
367
368/** Minimal fields needed to be used in commit graph structures. */
369export interface HashWithParents {
370 hash: Hash;
371 parents: ReadonlyArray<Hash>;
372 grandparents: ReadonlyArray<Hash>;
373}
374
375type BaseDagProps<C extends HashWithParents> = {
376 infoMap: ImMap<Hash, Readonly<C>>;
377 // childMap is derived from infoMap.
378 childMap: ImMap<Hash, List<Hash>>;
379};
380
381const BaseDagRecord = Record<BaseDagProps<HashWithParents>>({
382 infoMap: ImMap(),
383 childMap: ImMap(),
384});
385type BaseDagRecord<C extends HashWithParents> = RecordOf<BaseDagProps<C>>;
386
387const EMPTY_DAG_RECORD = BaseDagRecord();
388const EMPTY_LIST = List<Hash>();
389