addons/isl/src/dag/base_dag.tsblame
View source
b69ab311/**
b69ab312 * Copyright (c) Meta Platforms, Inc. and affiliates.
b69ab313 *
b69ab314 * This source code is licensed under the MIT license found in the
b69ab315 * LICENSE file in the root directory of this source tree.
b69ab316 */
b69ab317
b69ab318import type {RecordOf} from 'immutable';
b69ab319import type {Hash} from '../types';
b69ab3110import type {SetLike} from './set';
b69ab3111
b69ab3112import {Map as ImMap, List, Record} from 'immutable';
b69ab3113import {LRU, cachedMethod} from 'shared/LRU';
b69ab3114import {SelfUpdate} from 'shared/immutableExt';
b69ab3115import {nullthrows} from 'shared/utils';
b69ab3116import {HashSet} from './set';
b69ab3117
b69ab3118/**
b69ab3119 * Hash-map like container with graph related queries.
b69ab3120 *
b69ab3121 * Unlike a traditional source control dag, this works more like a map:
b69ab3122 * - add() does not require parents to (recursively) exist.
b69ab3123 * - remove() does not remove descendants (recursively).
b69ab3124 *
b69ab3125 * The `BaseDag` is a minimal implementation for core graph query
b69ab3126 * needs (like heads, roots, range, ancestors, gca, etc).
b69ab3127 * For use-cases that involve commit attributes, like phases, bookmarks,
b69ab3128 * successors, use `dag/Dag` instead.
b69ab3129 *
b69ab3130 * Internally maintains a "parent -> child" mapping for efficient queries.
b69ab3131 * All queries, regardless of the input size, are O(N) worst case. None
b69ab3132 * should be O(N^2).
b69ab3133 */
b69ab3134export class BaseDag<C extends HashWithParents> extends SelfUpdate<BaseDagRecord<C>> {
b69ab3135 constructor(record?: BaseDagRecord<C>) {
b69ab3136 super(record ?? (EMPTY_DAG_RECORD as BaseDagRecord<C>));
b69ab3137 }
b69ab3138
b69ab3139 // Edit
b69ab3140
b69ab3141 /**
b69ab3142 * Add commits. Parents do not have to be added first.
b69ab3143 * If a commit with the same hash already exists, it will be replaced.
b69ab3144 */
b69ab3145 add(commits: Iterable<C>): BaseDag<C> {
b69ab3146 const commitArray = [...commits];
b69ab3147 const dag = this.remove(commitArray.map(c => c.hash));
b69ab3148 let {childMap, infoMap} = dag;
b69ab3149 for (const commit of commitArray) {
b69ab3150 commit.parents.forEach(p => {
b69ab3151 const children = childMap.get(p);
b69ab3152 const child = commit.hash;
b69ab3153 const newChildren =
b69ab3154 children == null
b69ab3155 ? List([child])
b69ab3156 : children.contains(child)
b69ab3157 ? children
b69ab3158 : children.push(child);
b69ab3159 childMap = childMap.set(p, newChildren);
b69ab3160 });
b69ab3161 infoMap = infoMap.set(commit.hash, commit);
b69ab3162 }
b69ab3163 const record = dag.inner.merge({infoMap, childMap});
b69ab3164 return new BaseDag(record);
b69ab3165 }
b69ab3166
b69ab3167 /** Remove commits by hash. Descendants are not removed automatically. */
b69ab3168 remove(set: SetLike): BaseDag<C> {
b69ab3169 const hashSet = HashSet.fromHashes(set);
b69ab3170 let {childMap, infoMap} = this;
b69ab3171 for (const hash of hashSet) {
b69ab3172 const commit = this.get(hash);
b69ab3173 if (commit == undefined) {
b69ab3174 continue;
b69ab3175 }
b69ab3176 commit.parents.forEach(p => {
b69ab3177 const children = childMap.get(p);
b69ab3178 if (children != null) {
b69ab3179 const newChildren = children.filter(h => h !== hash);
b69ab3180 childMap = childMap.set(p, newChildren);
b69ab3181 }
b69ab3182 });
b69ab3183 infoMap = infoMap.remove(hash);
b69ab3184 }
b69ab3185 const record = this.inner.merge({infoMap, childMap});
b69ab3186 return new BaseDag(record);
b69ab3187 }
b69ab3188
b69ab3189 // Basic query
b69ab3190
b69ab3191 get(hash: Hash | undefined | null): Readonly<C> | undefined {
b69ab3192 return hash == null ? undefined : this.infoMap.get(hash);
b69ab3193 }
b69ab3194
b69ab3195 has(hash: Hash | undefined | null): boolean {
b69ab3196 return this.get(hash) !== undefined;
b69ab3197 }
b69ab3198
b69ab3199 [Symbol.iterator](): IterableIterator<Hash> {
b69ab31100 return this.infoMap.keys();
b69ab31101 }
b69ab31102
b69ab31103 values(): Iterable<Readonly<C>> {
b69ab31104 return this.infoMap.values();
b69ab31105 }
b69ab31106
b69ab31107 /** Get parent hashes. Only return hashes present in this.infoMap. */
b69ab31108 parentHashes(hash: Hash): Readonly<Hash[]> {
b69ab31109 return this.infoMap.get(hash)?.parents?.filter(p => this.infoMap.has(p)) ?? [];
b69ab31110 }
b69ab31111
b69ab31112 /** Get child hashes. Only return hashes present in this.infoMap. */
b69ab31113 childHashes(hash: Hash): List<Hash> {
b69ab31114 if (!this.infoMap.has(hash)) {
b69ab31115 return EMPTY_LIST;
b69ab31116 }
b69ab31117 return this.childMap.get(hash) ?? EMPTY_LIST;
b69ab31118 }
b69ab31119
b69ab31120 // High-level query
b69ab31121
b69ab31122 parents(set: SetLike): HashSet {
b69ab31123 return flatMap(set, h => this.parentHashes(h));
b69ab31124 }
b69ab31125
b69ab31126 children(set: SetLike): HashSet {
b69ab31127 return flatMap(set, h => this.childHashes(h));
b69ab31128 }
b69ab31129
b69ab31130 /**
b69ab31131 * set + parents(set) + parents(parents(set)) + ...
b69ab31132 * If `within` is set, change `parents` to only return hashes within `within`.
b69ab31133 */
b69ab31134 ancestors = cachedMethod(this.ancestorsImpl, {cache: ancestorsCache});
b69ab31135 ancestorsImpl(set: SetLike, props?: {within?: SetLike}): HashSet {
b69ab31136 const filter = nullableWithinContains(props?.within);
b69ab31137 return unionFlatMap(set, h => this.parentHashes(h).filter(filter));
b69ab31138 }
b69ab31139
b69ab31140 /**
b69ab31141 * set + children(set) + children(children(set)) + ...
b69ab31142 * If `within` is set, change `children` to only return hashes within `within`.
b69ab31143 */
b69ab31144 descendants(set: SetLike, props?: {within?: SetLike}): HashSet {
b69ab31145 const filter = nullableWithinContains(props?.within);
b69ab31146 return unionFlatMap(set, h => this.childHashes(h).filter(filter));
b69ab31147 }
b69ab31148
b69ab31149 /** ancestors(heads) & descendants(roots) */
b69ab31150 range(roots: SetLike, heads: SetLike): HashSet {
b69ab31151 // PERF: This is not the most efficient, but easy to write.
b69ab31152 const ancestors = this.ancestors(heads);
b69ab31153 return ancestors.intersect(this.descendants(roots, {within: ancestors}));
b69ab31154 }
b69ab31155
b69ab31156 /** set - children(set) */
b69ab31157 roots(set: SetLike): HashSet {
b69ab31158 const children = this.children(set);
b69ab31159 return HashSet.fromHashes(set).subtract(children);
b69ab31160 }
b69ab31161
b69ab31162 /** set - parents(set) */
b69ab31163 heads(set: SetLike): HashSet {
b69ab31164 const parents = this.parents(set);
b69ab31165 return HashSet.fromHashes(set).subtract(parents);
b69ab31166 }
b69ab31167
b69ab31168 /** Greatest common ancestor. heads(ancestors(set1) & ancestors(set2)). */
b69ab31169 gca(set1: SetLike, set2: SetLike): HashSet {
b69ab31170 return this.heads(this.ancestors(set1).intersect(this.ancestors(set2)));
b69ab31171 }
b69ab31172
b69ab31173 /** ancestor in ancestors(descendant) */
b69ab31174 isAncestor(ancestor: Hash, descendant: Hash): boolean {
b69ab31175 // PERF: This is not the most efficient, but easy to write.
b69ab31176 return this.ancestors(descendant).contains(ancestor);
b69ab31177 }
b69ab31178
b69ab31179 /** Return hashes present in this dag. */
b69ab31180 present(set: SetLike): HashSet {
b69ab31181 const hashes = HashSet.fromHashes(set)
b69ab31182 .toHashes()
b69ab31183 .filter(h => this.has(h));
b69ab31184 return HashSet.fromHashes(hashes);
b69ab31185 }
b69ab31186
b69ab31187 /**
b69ab31188 * Return commits that match the given condition.
b69ab31189 * This can be useful for things like "obsolete()".
b69ab31190 * `set`, if not undefined, limits the search space.
b69ab31191 */
b69ab31192 filter(predicate: (commit: Readonly<C>) => boolean, set?: SetLike): HashSet {
b69ab31193 let hashes: SetLike;
b69ab31194 if (set === undefined) {
b69ab31195 hashes = this.infoMap.filter((commit, _hash) => predicate(commit)).keys();
b69ab31196 } else {
b69ab31197 hashes = HashSet.fromHashes(set)
b69ab31198 .toHashes()
b69ab31199 .filter(h => {
b69ab31200 const c = this.get(h);
b69ab31201 return c != undefined && predicate(c);
b69ab31202 });
b69ab31203 }
b69ab31204 return HashSet.fromHashes(hashes);
b69ab31205 }
b69ab31206
b69ab31207 /**
b69ab31208 * Topologically sort hashes from roots to heads.
b69ab31209 *
b69ab31210 * When there are multiple children (or roots), 'compare' breaks ties.
b69ab31211 * Attempt to avoid interleved branches by outputting a branch continuously
b69ab31212 * until completing the branch or hitting a merge.
b69ab31213 *
b69ab31214 * With `gap` set to `true`, there could be "gap" in `set`. For example,
b69ab31215 * with the graph x--y--z, the `set` can be ['x', 'z'] and sort will work
b69ab31216 * as expected, with the downside of being O(dag size). If you run sort
b69ab31217 * in a tight loop, consider setting `gap` to `false` for performance.
b69ab31218 */
b69ab31219 sortAsc(set: SetLike, props?: SortProps<C>): Array<Hash> {
b69ab31220 const iter = this.sortImpl(
b69ab31221 set,
b69ab31222 props?.compare ?? ((a, b) => (a.hash < b.hash ? -1 : 1)),
b69ab31223 s => this.roots(s),
b69ab31224 h => this.childHashes(h),
b69ab31225 h => this.parents(h),
b69ab31226 props?.gap ?? true,
b69ab31227 );
b69ab31228 return [...iter];
b69ab31229 }
b69ab31230
b69ab31231 /**
b69ab31232 * Topologically sort hashes from heads to roots.
b69ab31233 *
b69ab31234 * See `sortAsc` for details.
b69ab31235 */
b69ab31236 sortDesc(set: SetLike, props?: SortProps<C>): Array<Hash> {
b69ab31237 const iter = this.sortImpl(
b69ab31238 set,
b69ab31239 props?.compare ?? ((a, b) => (a.hash < b.hash ? 1 : -1)),
b69ab31240 s => this.heads(s),
b69ab31241 h => List(this.parentHashes(h)),
b69ab31242 h => this.children(h),
b69ab31243 props?.gap ?? true,
b69ab31244 );
b69ab31245 return [...iter];
b69ab31246 }
b69ab31247
b69ab31248 // DFS from 'start' to 'next'
b69ab31249 // not considering branch length.
b69ab31250 private *sortImpl(
b69ab31251 set: SetLike,
b69ab31252 compare: (a: C, b: C) => number,
b69ab31253 getStart: (set: HashSet) => HashSet,
b69ab31254 getNext: (hash: Hash) => List<Hash>,
b69ab31255 getPrev: (hash: Hash) => HashSet,
b69ab31256 gap: boolean,
b69ab31257 ): Generator<Hash> {
b69ab31258 const hashSet = this.present(set);
b69ab31259 // There might be "gaps" in hashSet. For example, set might be 'x' and 'z'
b69ab31260 // in a 'x--y--z' graph. We need to fill the gap ('y') to sort properly.
b69ab31261 // However, range(set, set) is O(dag.size) so we allow the callsite to
b69ab31262 // disable this feature with an option.
b69ab31263 const filledSet = gap ? this.range(hashSet, hashSet) : hashSet;
b69ab31264 const alreadyVisited = new Set<Hash>();
b69ab31265 // We use concat and pop (not unshift) so the order is reversed.
b69ab31266 const compareHash = (a: Hash, b: Hash) =>
b69ab31267 -compare(nullthrows(this.get(a)), nullthrows(this.get(b)));
b69ab31268 // The number of parents remaining to be visited. This ensures merges are not
b69ab31269 // outputted until all parents are outputted.
b69ab31270 const remaining = new Map<Hash, number>(
b69ab31271 filledSet.toSeq().map(h => [h, getPrev(h).intersect(filledSet).size]),
b69ab31272 );
b69ab31273 // List is used as a dequeue.
b69ab31274 let toVisit = getStart(filledSet).toHashes().toList().sort(compareHash);
b69ab31275 while (true) {
b69ab31276 // pop front
b69ab31277 const next = toVisit.last();
b69ab31278 if (next == null) {
b69ab31279 break;
b69ab31280 }
b69ab31281 toVisit = toVisit.pop();
b69ab31282 if (alreadyVisited.has(next)) {
b69ab31283 continue;
b69ab31284 }
b69ab31285 // Need to wait for visiting other parents first?
b69ab31286 if ((remaining.get(next) ?? 0) > 0) {
b69ab31287 toVisit = toVisit.unshift(next);
b69ab31288 continue;
b69ab31289 }
b69ab31290 // Output it.
b69ab31291 if (hashSet.contains(next)) {
b69ab31292 yield next;
b69ab31293 }
b69ab31294 alreadyVisited.add(next);
b69ab31295 // Visit next.
b69ab31296 const children = getNext(next).sort(compareHash);
b69ab31297 for (const c of children) {
b69ab31298 remaining.set(c, (remaining.get(c) ?? 1) - 1);
b69ab31299 }
b69ab31300 toVisit = toVisit.concat(children);
b69ab31301 }
b69ab31302 }
b69ab31303
b69ab31304 // Delegates
b69ab31305
b69ab31306 get infoMap(): ImMap<Hash, Readonly<C>> {
b69ab31307 return this.inner.infoMap;
b69ab31308 }
b69ab31309
b69ab31310 get childMap(): ImMap<Hash, List<Hash>> {
b69ab31311 return this.inner.childMap;
b69ab31312 }
b69ab31313
b69ab31314 // Filters.
b69ab31315
b69ab31316 merge(set?: SetLike): HashSet {
b69ab31317 return this.filter(c => c.parents.length > 1, set);
b69ab31318 }
b69ab31319}
b69ab31320
b69ab31321const ancestorsCache = new LRU(1000);
b69ab31322
b69ab31323function flatMap(set: SetLike, f: (h: Hash) => List<Hash> | Readonly<Array<Hash>>): HashSet {
b69ab31324 return new HashSet(
b69ab31325 HashSet.fromHashes(set)
b69ab31326 .toHashes()
b69ab31327 .flatMap(h => f(h)),
b69ab31328 );
b69ab31329}
b69ab31330
b69ab31331/** set + flatMap(set, f) + flatMap(flatMap(set, f), f) + ... */
b69ab31332export function unionFlatMap(
b69ab31333 set: SetLike,
b69ab31334 f: (h: Hash) => List<Hash> | Readonly<Array<Hash>>,
b69ab31335): HashSet {
b69ab31336 let result = new HashSet().toHashes();
b69ab31337 let newHashes = [...HashSet.fromHashes(set)];
b69ab31338 while (newHashes.length > 0) {
b69ab31339 result = result.concat(newHashes);
b69ab31340 const nextNewHashes: Hash[] = [];
b69ab31341 newHashes.forEach(h => {
b69ab31342 f(h).forEach(v => {
b69ab31343 if (!result.contains(v)) {
b69ab31344 nextNewHashes.push(v);
b69ab31345 }
b69ab31346 });
b69ab31347 });
b69ab31348 newHashes = nextNewHashes;
b69ab31349 }
b69ab31350 return HashSet.fromHashes(result);
b69ab31351}
b69ab31352
b69ab31353export type SortProps<C> = {compare?: (a: C, b: C) => number; gap?: boolean};
b69ab31354
b69ab31355/**
b69ab31356 * If `set` is undefined, return a function that always returns true.
b69ab31357 * Otherwise, return a function that checks whether `set` contains `h`.
b69ab31358 */
b69ab31359function nullableWithinContains(set?: SetLike): (h: Hash) => boolean {
b69ab31360 if (set === undefined) {
b69ab31361 return _h => true;
b69ab31362 } else {
b69ab31363 const hashSet = HashSet.fromHashes(set);
b69ab31364 return h => hashSet.contains(h);
b69ab31365 }
b69ab31366}
b69ab31367
b69ab31368/** Minimal fields needed to be used in commit graph structures. */
b69ab31369export interface HashWithParents {
b69ab31370 hash: Hash;
b69ab31371 parents: ReadonlyArray<Hash>;
b69ab31372 grandparents: ReadonlyArray<Hash>;
b69ab31373}
b69ab31374
b69ab31375type BaseDagProps<C extends HashWithParents> = {
b69ab31376 infoMap: ImMap<Hash, Readonly<C>>;
b69ab31377 // childMap is derived from infoMap.
b69ab31378 childMap: ImMap<Hash, List<Hash>>;
b69ab31379};
b69ab31380
b69ab31381const BaseDagRecord = Record<BaseDagProps<HashWithParents>>({
b69ab31382 infoMap: ImMap(),
b69ab31383 childMap: ImMap(),
b69ab31384});
b69ab31385type BaseDagRecord<C extends HashWithParents> = RecordOf<BaseDagProps<C>>;
b69ab31386
b69ab31387const EMPTY_DAG_RECORD = BaseDagRecord();
b69ab31388const EMPTY_LIST = List<Hash>();