| 1 | /** |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * |
| 4 | * This source code is licensed under the MIT license found in the |
| 5 | * LICENSE file in the root directory of this source tree. |
| 6 | */ |
| 7 | |
| 8 | import type {RecordOf} from 'immutable'; |
| 9 | import type {Hash} from '../types'; |
| 10 | import type {SetLike} from './set'; |
| 11 | |
| 12 | import {Map as ImMap, List, Record} from 'immutable'; |
| 13 | import {LRU, cachedMethod} from 'shared/LRU'; |
| 14 | import {SelfUpdate} from 'shared/immutableExt'; |
| 15 | import {nullthrows} from 'shared/utils'; |
| 16 | import {HashSet} from './set'; |
| 17 | |
| 18 | /** |
| 19 | * Hash-map like container with graph related queries. |
| 20 | * |
| 21 | * Unlike a traditional source control dag, this works more like a map: |
| 22 | * - add() does not require parents to (recursively) exist. |
| 23 | * - remove() does not remove descendants (recursively). |
| 24 | * |
| 25 | * The `BaseDag` is a minimal implementation for core graph query |
| 26 | * needs (like heads, roots, range, ancestors, gca, etc). |
| 27 | * For use-cases that involve commit attributes, like phases, bookmarks, |
| 28 | * successors, use `dag/Dag` instead. |
| 29 | * |
| 30 | * Internally maintains a "parent -> child" mapping for efficient queries. |
| 31 | * All queries, regardless of the input size, are O(N) worst case. None |
| 32 | * should be O(N^2). |
| 33 | */ |
| 34 | export class BaseDag<C extends HashWithParents> extends SelfUpdate<BaseDagRecord<C>> { |
| 35 | constructor(record?: BaseDagRecord<C>) { |
| 36 | super(record ?? (EMPTY_DAG_RECORD as BaseDagRecord<C>)); |
| 37 | } |
| 38 | |
| 39 | // Edit |
| 40 | |
| 41 | /** |
| 42 | * Add commits. Parents do not have to be added first. |
| 43 | * If a commit with the same hash already exists, it will be replaced. |
| 44 | */ |
| 45 | add(commits: Iterable<C>): BaseDag<C> { |
| 46 | const commitArray = [...commits]; |
| 47 | const dag = this.remove(commitArray.map(c => c.hash)); |
| 48 | let {childMap, infoMap} = dag; |
| 49 | for (const commit of commitArray) { |
| 50 | commit.parents.forEach(p => { |
| 51 | const children = childMap.get(p); |
| 52 | const child = commit.hash; |
| 53 | const newChildren = |
| 54 | children == null |
| 55 | ? List([child]) |
| 56 | : children.contains(child) |
| 57 | ? children |
| 58 | : children.push(child); |
| 59 | childMap = childMap.set(p, newChildren); |
| 60 | }); |
| 61 | infoMap = infoMap.set(commit.hash, commit); |
| 62 | } |
| 63 | const record = dag.inner.merge({infoMap, childMap}); |
| 64 | return new BaseDag(record); |
| 65 | } |
| 66 | |
| 67 | /** Remove commits by hash. Descendants are not removed automatically. */ |
| 68 | remove(set: SetLike): BaseDag<C> { |
| 69 | const hashSet = HashSet.fromHashes(set); |
| 70 | let {childMap, infoMap} = this; |
| 71 | for (const hash of hashSet) { |
| 72 | const commit = this.get(hash); |
| 73 | if (commit == undefined) { |
| 74 | continue; |
| 75 | } |
| 76 | commit.parents.forEach(p => { |
| 77 | const children = childMap.get(p); |
| 78 | if (children != null) { |
| 79 | const newChildren = children.filter(h => h !== hash); |
| 80 | childMap = childMap.set(p, newChildren); |
| 81 | } |
| 82 | }); |
| 83 | infoMap = infoMap.remove(hash); |
| 84 | } |
| 85 | const record = this.inner.merge({infoMap, childMap}); |
| 86 | return new BaseDag(record); |
| 87 | } |
| 88 | |
| 89 | // Basic query |
| 90 | |
| 91 | get(hash: Hash | undefined | null): Readonly<C> | undefined { |
| 92 | return hash == null ? undefined : this.infoMap.get(hash); |
| 93 | } |
| 94 | |
| 95 | has(hash: Hash | undefined | null): boolean { |
| 96 | return this.get(hash) !== undefined; |
| 97 | } |
| 98 | |
| 99 | [Symbol.iterator](): IterableIterator<Hash> { |
| 100 | return this.infoMap.keys(); |
| 101 | } |
| 102 | |
| 103 | values(): Iterable<Readonly<C>> { |
| 104 | return this.infoMap.values(); |
| 105 | } |
| 106 | |
| 107 | /** Get parent hashes. Only return hashes present in this.infoMap. */ |
| 108 | parentHashes(hash: Hash): Readonly<Hash[]> { |
| 109 | return this.infoMap.get(hash)?.parents?.filter(p => this.infoMap.has(p)) ?? []; |
| 110 | } |
| 111 | |
| 112 | /** Get child hashes. Only return hashes present in this.infoMap. */ |
| 113 | childHashes(hash: Hash): List<Hash> { |
| 114 | if (!this.infoMap.has(hash)) { |
| 115 | return EMPTY_LIST; |
| 116 | } |
| 117 | return this.childMap.get(hash) ?? EMPTY_LIST; |
| 118 | } |
| 119 | |
| 120 | // High-level query |
| 121 | |
| 122 | parents(set: SetLike): HashSet { |
| 123 | return flatMap(set, h => this.parentHashes(h)); |
| 124 | } |
| 125 | |
| 126 | children(set: SetLike): HashSet { |
| 127 | return flatMap(set, h => this.childHashes(h)); |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * set + parents(set) + parents(parents(set)) + ... |
| 132 | * If `within` is set, change `parents` to only return hashes within `within`. |
| 133 | */ |
| 134 | ancestors = cachedMethod(this.ancestorsImpl, {cache: ancestorsCache}); |
| 135 | ancestorsImpl(set: SetLike, props?: {within?: SetLike}): HashSet { |
| 136 | const filter = nullableWithinContains(props?.within); |
| 137 | return unionFlatMap(set, h => this.parentHashes(h).filter(filter)); |
| 138 | } |
| 139 | |
| 140 | /** |
| 141 | * set + children(set) + children(children(set)) + ... |
| 142 | * If `within` is set, change `children` to only return hashes within `within`. |
| 143 | */ |
| 144 | descendants(set: SetLike, props?: {within?: SetLike}): HashSet { |
| 145 | const filter = nullableWithinContains(props?.within); |
| 146 | return unionFlatMap(set, h => this.childHashes(h).filter(filter)); |
| 147 | } |
| 148 | |
| 149 | /** ancestors(heads) & descendants(roots) */ |
| 150 | range(roots: SetLike, heads: SetLike): HashSet { |
| 151 | // PERF: This is not the most efficient, but easy to write. |
| 152 | const ancestors = this.ancestors(heads); |
| 153 | return ancestors.intersect(this.descendants(roots, {within: ancestors})); |
| 154 | } |
| 155 | |
| 156 | /** set - children(set) */ |
| 157 | roots(set: SetLike): HashSet { |
| 158 | const children = this.children(set); |
| 159 | return HashSet.fromHashes(set).subtract(children); |
| 160 | } |
| 161 | |
| 162 | /** set - parents(set) */ |
| 163 | heads(set: SetLike): HashSet { |
| 164 | const parents = this.parents(set); |
| 165 | return HashSet.fromHashes(set).subtract(parents); |
| 166 | } |
| 167 | |
| 168 | /** Greatest common ancestor. heads(ancestors(set1) & ancestors(set2)). */ |
| 169 | gca(set1: SetLike, set2: SetLike): HashSet { |
| 170 | return this.heads(this.ancestors(set1).intersect(this.ancestors(set2))); |
| 171 | } |
| 172 | |
| 173 | /** ancestor in ancestors(descendant) */ |
| 174 | isAncestor(ancestor: Hash, descendant: Hash): boolean { |
| 175 | // PERF: This is not the most efficient, but easy to write. |
| 176 | return this.ancestors(descendant).contains(ancestor); |
| 177 | } |
| 178 | |
| 179 | /** Return hashes present in this dag. */ |
| 180 | present(set: SetLike): HashSet { |
| 181 | const hashes = HashSet.fromHashes(set) |
| 182 | .toHashes() |
| 183 | .filter(h => this.has(h)); |
| 184 | return HashSet.fromHashes(hashes); |
| 185 | } |
| 186 | |
| 187 | /** |
| 188 | * Return commits that match the given condition. |
| 189 | * This can be useful for things like "obsolete()". |
| 190 | * `set`, if not undefined, limits the search space. |
| 191 | */ |
| 192 | filter(predicate: (commit: Readonly<C>) => boolean, set?: SetLike): HashSet { |
| 193 | let hashes: SetLike; |
| 194 | if (set === undefined) { |
| 195 | hashes = this.infoMap.filter((commit, _hash) => predicate(commit)).keys(); |
| 196 | } else { |
| 197 | hashes = HashSet.fromHashes(set) |
| 198 | .toHashes() |
| 199 | .filter(h => { |
| 200 | const c = this.get(h); |
| 201 | return c != undefined && predicate(c); |
| 202 | }); |
| 203 | } |
| 204 | return HashSet.fromHashes(hashes); |
| 205 | } |
| 206 | |
| 207 | /** |
| 208 | * Topologically sort hashes from roots to heads. |
| 209 | * |
| 210 | * When there are multiple children (or roots), 'compare' breaks ties. |
| 211 | * Attempt to avoid interleved branches by outputting a branch continuously |
| 212 | * until completing the branch or hitting a merge. |
| 213 | * |
| 214 | * With `gap` set to `true`, there could be "gap" in `set`. For example, |
| 215 | * with the graph x--y--z, the `set` can be ['x', 'z'] and sort will work |
| 216 | * as expected, with the downside of being O(dag size). If you run sort |
| 217 | * in a tight loop, consider setting `gap` to `false` for performance. |
| 218 | */ |
| 219 | sortAsc(set: SetLike, props?: SortProps<C>): Array<Hash> { |
| 220 | const iter = this.sortImpl( |
| 221 | set, |
| 222 | props?.compare ?? ((a, b) => (a.hash < b.hash ? -1 : 1)), |
| 223 | s => this.roots(s), |
| 224 | h => this.childHashes(h), |
| 225 | h => this.parents(h), |
| 226 | props?.gap ?? true, |
| 227 | ); |
| 228 | return [...iter]; |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * Topologically sort hashes from heads to roots. |
| 233 | * |
| 234 | * See `sortAsc` for details. |
| 235 | */ |
| 236 | sortDesc(set: SetLike, props?: SortProps<C>): Array<Hash> { |
| 237 | const iter = this.sortImpl( |
| 238 | set, |
| 239 | props?.compare ?? ((a, b) => (a.hash < b.hash ? 1 : -1)), |
| 240 | s => this.heads(s), |
| 241 | h => List(this.parentHashes(h)), |
| 242 | h => this.children(h), |
| 243 | props?.gap ?? true, |
| 244 | ); |
| 245 | return [...iter]; |
| 246 | } |
| 247 | |
| 248 | // DFS from 'start' to 'next' |
| 249 | // not considering branch length. |
| 250 | private *sortImpl( |
| 251 | set: SetLike, |
| 252 | compare: (a: C, b: C) => number, |
| 253 | getStart: (set: HashSet) => HashSet, |
| 254 | getNext: (hash: Hash) => List<Hash>, |
| 255 | getPrev: (hash: Hash) => HashSet, |
| 256 | gap: boolean, |
| 257 | ): Generator<Hash> { |
| 258 | const hashSet = this.present(set); |
| 259 | // There might be "gaps" in hashSet. For example, set might be 'x' and 'z' |
| 260 | // in a 'x--y--z' graph. We need to fill the gap ('y') to sort properly. |
| 261 | // However, range(set, set) is O(dag.size) so we allow the callsite to |
| 262 | // disable this feature with an option. |
| 263 | const filledSet = gap ? this.range(hashSet, hashSet) : hashSet; |
| 264 | const alreadyVisited = new Set<Hash>(); |
| 265 | // We use concat and pop (not unshift) so the order is reversed. |
| 266 | const compareHash = (a: Hash, b: Hash) => |
| 267 | -compare(nullthrows(this.get(a)), nullthrows(this.get(b))); |
| 268 | // The number of parents remaining to be visited. This ensures merges are not |
| 269 | // outputted until all parents are outputted. |
| 270 | const remaining = new Map<Hash, number>( |
| 271 | filledSet.toSeq().map(h => [h, getPrev(h).intersect(filledSet).size]), |
| 272 | ); |
| 273 | // List is used as a dequeue. |
| 274 | let toVisit = getStart(filledSet).toHashes().toList().sort(compareHash); |
| 275 | while (true) { |
| 276 | // pop front |
| 277 | const next = toVisit.last(); |
| 278 | if (next == null) { |
| 279 | break; |
| 280 | } |
| 281 | toVisit = toVisit.pop(); |
| 282 | if (alreadyVisited.has(next)) { |
| 283 | continue; |
| 284 | } |
| 285 | // Need to wait for visiting other parents first? |
| 286 | if ((remaining.get(next) ?? 0) > 0) { |
| 287 | toVisit = toVisit.unshift(next); |
| 288 | continue; |
| 289 | } |
| 290 | // Output it. |
| 291 | if (hashSet.contains(next)) { |
| 292 | yield next; |
| 293 | } |
| 294 | alreadyVisited.add(next); |
| 295 | // Visit next. |
| 296 | const children = getNext(next).sort(compareHash); |
| 297 | for (const c of children) { |
| 298 | remaining.set(c, (remaining.get(c) ?? 1) - 1); |
| 299 | } |
| 300 | toVisit = toVisit.concat(children); |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | // Delegates |
| 305 | |
| 306 | get infoMap(): ImMap<Hash, Readonly<C>> { |
| 307 | return this.inner.infoMap; |
| 308 | } |
| 309 | |
| 310 | get childMap(): ImMap<Hash, List<Hash>> { |
| 311 | return this.inner.childMap; |
| 312 | } |
| 313 | |
| 314 | // Filters. |
| 315 | |
| 316 | merge(set?: SetLike): HashSet { |
| 317 | return this.filter(c => c.parents.length > 1, set); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | const ancestorsCache = new LRU(1000); |
| 322 | |
| 323 | function flatMap(set: SetLike, f: (h: Hash) => List<Hash> | Readonly<Array<Hash>>): HashSet { |
| 324 | return new HashSet( |
| 325 | HashSet.fromHashes(set) |
| 326 | .toHashes() |
| 327 | .flatMap(h => f(h)), |
| 328 | ); |
| 329 | } |
| 330 | |
| 331 | /** set + flatMap(set, f) + flatMap(flatMap(set, f), f) + ... */ |
| 332 | export function unionFlatMap( |
| 333 | set: SetLike, |
| 334 | f: (h: Hash) => List<Hash> | Readonly<Array<Hash>>, |
| 335 | ): HashSet { |
| 336 | let result = new HashSet().toHashes(); |
| 337 | let newHashes = [...HashSet.fromHashes(set)]; |
| 338 | while (newHashes.length > 0) { |
| 339 | result = result.concat(newHashes); |
| 340 | const nextNewHashes: Hash[] = []; |
| 341 | newHashes.forEach(h => { |
| 342 | f(h).forEach(v => { |
| 343 | if (!result.contains(v)) { |
| 344 | nextNewHashes.push(v); |
| 345 | } |
| 346 | }); |
| 347 | }); |
| 348 | newHashes = nextNewHashes; |
| 349 | } |
| 350 | return HashSet.fromHashes(result); |
| 351 | } |
| 352 | |
| 353 | export type SortProps<C> = {compare?: (a: C, b: C) => number; gap?: boolean}; |
| 354 | |
| 355 | /** |
| 356 | * If `set` is undefined, return a function that always returns true. |
| 357 | * Otherwise, return a function that checks whether `set` contains `h`. |
| 358 | */ |
| 359 | function nullableWithinContains(set?: SetLike): (h: Hash) => boolean { |
| 360 | if (set === undefined) { |
| 361 | return _h => true; |
| 362 | } else { |
| 363 | const hashSet = HashSet.fromHashes(set); |
| 364 | return h => hashSet.contains(h); |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | /** Minimal fields needed to be used in commit graph structures. */ |
| 369 | export interface HashWithParents { |
| 370 | hash: Hash; |
| 371 | parents: ReadonlyArray<Hash>; |
| 372 | grandparents: ReadonlyArray<Hash>; |
| 373 | } |
| 374 | |
| 375 | type BaseDagProps<C extends HashWithParents> = { |
| 376 | infoMap: ImMap<Hash, Readonly<C>>; |
| 377 | // childMap is derived from infoMap. |
| 378 | childMap: ImMap<Hash, List<Hash>>; |
| 379 | }; |
| 380 | |
| 381 | const BaseDagRecord = Record<BaseDagProps<HashWithParents>>({ |
| 382 | infoMap: ImMap(), |
| 383 | childMap: ImMap(), |
| 384 | }); |
| 385 | type BaseDagRecord<C extends HashWithParents> = RecordOf<BaseDagProps<C>>; |
| 386 | |
| 387 | const EMPTY_DAG_RECORD = BaseDagRecord(); |
| 388 | const EMPTY_LIST = List<Hash>(); |
| 389 | |