diff --git a/src/utils/AbstractDirectedGraph.php b/src/utils/AbstractDirectedGraph.php index 40242fa..509f0f5 100644 --- a/src/utils/AbstractDirectedGraph.php +++ b/src/utils/AbstractDirectedGraph.php @@ -1,333 +1,337 @@ addNodes( * array( * $object->getPHID() => $object->getChildPHIDs(), * )); * $detector->loadGraph(); * * Now you can query the graph, e.g. by detecting cycles: * * $cycle = $detector->detectCycles($object->getPHID()); * * If ##$cycle## is empty, no graph cycle is reachable from the node. If it * is nonempty, it contains a list of nodes which form a graph cycle. * * NOTE: Nodes must be represented with scalars. * * @task build Graph Construction * @task cycle Cycle Detection * @task explore Graph Exploration */ abstract class AbstractDirectedGraph extends Phobject { private $knownNodes = array(); private $graphLoaded = false; /* -( Graph Construction )------------------------------------------------- */ /** * Load the edges for a list of nodes. You must override this method. You * will be passed a list of nodes, and should return a dictionary mapping * each node to the list of nodes that can be reached by following its the * edges which originate at it: for example, the child nodes of an object * which has a parent-child relationship to other objects. * * The intent of this method is to allow you to issue a single query per * graph level for graphs which are stored as edge tables in the database. * Generally, you will load all the objects which correspond to the list of * nodes, and then return a map from each of their IDs to all their children. * * NOTE: You must return an entry for every node you are passed, even if it * is invalid or can not be loaded. Either return an empty array (if this is * acceptable for your application) or throw an exception if you can't satisfy * this requirement. * * @param list A list of nodes. * @return dict A map of nodes to the nodes reachable along their edges. * There must be an entry for each node you were provided. * @task build */ abstract protected function loadEdges(array $nodes); /** * Seed the graph with known nodes. Often, you will provide the candidate * edges that a user is trying to create here, or the initial set of edges * you know about. * * @param dict A map of nodes to the nodes reachable along their edges. * @return this * @task build */ final public function addNodes(array $nodes) { if ($this->graphLoaded) { throw new Exception( pht( 'Call %s before calling %s. You can not add more nodes '. 'once you have loaded the graph.', __FUNCTION__.'()', 'loadGraph()')); } $this->knownNodes += $nodes; return $this; } final public function getNodes() { return $this->knownNodes; } /** - * Utility function to get a list of topographically sorted nodes out of a - * graph. + * Get the nodes in topological order. * - * This could be useful for example to figure out what order you can safely - * apply dependencies. - * - * Note this will loop indefinitely if the graph has a cycle. + * This method requires the graph be acyclic. For graphs which may contain + * cycles, see @{method:getNodesInRoughTopologicalOrder}. */ - final public function getTopographicallySortedNodes() { + final public function getNodesInTopologicalOrder() { $sorted = array(); $nodes = $this->getNodes(); $inverse_map = array(); foreach ($nodes as $node => $edges) { if (!isset($inverse_map[$node])) { $inverse_map[$node] = array(); } foreach ($edges as $edge) { if (!isset($inverse_map[$edge])) { $inverse_map[$edge] = array(); } $inverse_map[$edge][$node] = $node; } } $end_nodes = array(); foreach ($inverse_map as $node => $edges) { if (empty($edges)) { $end_nodes[] = $node; } } while (!empty($end_nodes)) { $current_node = array_pop($end_nodes); $sorted[] = $current_node; $current_edges = $nodes[$current_node]; foreach ($current_edges as $index => $current_edge) { // delete the edge from the normal map unset($nodes[$current_node][$index]); // and from the inverse map which is modestly trickier $inverse_nodes = $inverse_map[$current_edge]; unset($inverse_nodes[$current_node]); $inverse_map[$current_edge] = $inverse_nodes; // no more edges means this is an "end node" now if (empty($inverse_map[$current_edge])) { $end_nodes[] = $current_edge; } } } return $sorted; } /** - * Utility function to get the best effort topographically sorted - * nodes out of a graph. + * Get the nodes in topological order, or some roughly similar order if + * the graph contains cycles. + * + * This method will return an ordering for cyclic graphs. The method will + * attempt to make it look like a topological ordering, but since cyclic + * graphs have no complete toplogical ordering, you might get anything. + * + * If you know the graph is acyclic and want an actual topological order, + * use @{method:getNodesInTopologicalOrder}. */ - final public function getBestEffortTopographicallySortedNodes() { + final public function getNodesInRoughTopologicalOrder() { $nodes = $this->getNodes(); $edges = $this->loadEdges($nodes); $results = array(); $completed = array(); $depth = 0; while (true) { $next = array(); foreach ($nodes as $node) { if (isset($completed[$node])) { continue; } $capable = true; foreach ($edges[$node] as $edge) { if (!isset($completed[$edge])) { $capable = false; break; } } if ($capable) { $next[] = $node; } } if (count($next) === 0) { // No more nodes to traverse; we are deadlocked if the number // of completed nodes is less than the total number of nodes. break; } foreach ($next as $node) { $results[] = array( 'node' => $node, 'depth' => $depth, 'cycle' => false, ); $completed[$node] = true; } $depth++; } foreach ($nodes as $node) { if (!isset($completed[$node])) { $results[] = array( 'node' => $node, 'depth' => $depth, 'cycle' => true, ); } } return $results; } /** * Load the graph, building it out so operations can be performed on it. This * constructs the graph level-by-level, calling @{method:loadEdges} to * expand the graph at each stage until it is complete. * * @return this * @task build */ final public function loadGraph() { $new_nodes = $this->knownNodes; while (true) { $load = array(); foreach ($new_nodes as $node => $edges) { foreach ($edges as $edge) { if (!isset($this->knownNodes[$edge])) { $load[$edge] = true; } } } if (empty($load)) { break; } $load = array_keys($load); $new_nodes = $this->loadEdges($load); foreach ($load as $node) { if (!isset($new_nodes[$node]) || !is_array($new_nodes[$node])) { throw new Exception( pht( '%s must return an edge list array for each provided '. 'node, or the cycle detection algorithm may not terminate.', 'loadEdges()')); } } $this->addNodes($new_nodes); } $this->graphLoaded = true; return $this; } /* -( Cycle Detection )---------------------------------------------------- */ /** * Detect if there are any cycles reachable from a given node. * * If cycles are reachable, it returns a list of nodes which create a cycle. * Note that this list may include nodes which aren't actually part of the * cycle, but lie on the graph between the specified node and the cycle. * For example, it might return something like this (when passed "A"): * * A, B, C, D, E, C * * This means you can walk from A to B to C to D to E and then back to C, * which forms a cycle. A and B are included even though they are not part * of the cycle. When presenting information about graph cycles to users, * including these nodes is generally useful. This also shouldn't ever happen * if you've vetted prior edges before writing them, because it means there * is a preexisting cycle in the graph. * * NOTE: This only detects cycles reachable from a node. It does not detect * cycles in the entire graph. * * @param scalar The node to walk from, looking for graph cycles. * @return list|null Returns null if no cycles are reachable from the node, * or a list of nodes that form a cycle. * @task cycle */ final public function detectCycles($node) { if (!$this->graphLoaded) { throw new Exception( pht( 'Call %s to build the graph out before calling %s.', 'loadGraph()', __FUNCTION__.'()')); } if (!isset($this->knownNodes[$node])) { throw new Exception( pht( "The node '%s' is not known. Call %s to seed the graph with nodes.", $node, 'addNodes()')); } $visited = array(); return $this->performCycleDetection($node, $visited); } /** * Internal cycle detection implementation. Recursively walks the graph, * keeping track of where it's been, and returns the first cycle it finds. * * @param scalar The node to walk from. * @param list Previously visited nodes. * @return null|list Null if no cycles are found, or a list of nodes * which cycle. * @task cycle */ final private function performCycleDetection($node, array $visited) { $visited[$node] = true; foreach ($this->knownNodes[$node] as $edge) { if (isset($visited[$edge])) { $result = array_keys($visited); $result[] = $edge; return $result; } $result = $this->performCycleDetection($edge, $visited); if ($result) { return $result; } } return null; } } diff --git a/src/utils/__tests__/AbstractDirectedGraphTestCase.php b/src/utils/__tests__/AbstractDirectedGraphTestCase.php index 57af4ed..19128d6 100644 --- a/src/utils/__tests__/AbstractDirectedGraphTestCase.php +++ b/src/utils/__tests__/AbstractDirectedGraphTestCase.php @@ -1,179 +1,179 @@ array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, pht('Trivial Graph')); } public function testNoncyclicGraph() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D'), 'C' => array(), 'D' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, pht('Noncyclic Graph')); } public function testTrivialCyclicGraph() { $graph = array( 'A' => array('A'), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(array('A', 'A'), $cycle, pht('Trivial Cycle')); } public function testCyclicGraph() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D'), 'C' => array('E', 'F'), 'D' => array(), 'E' => array(), 'F' => array('G', 'C'), 'G' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(array('A', 'C', 'F', 'C'), $cycle, pht('Cyclic Graph')); } public function testNonTreeGraph() { // This graph is non-cyclic, but C is both a child and a grandchild of A. // This is permitted. $graph = array( 'A' => array('B', 'C'), 'B' => array('C'), 'C' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, pht('Non-tree graph')); } public function testEdgeLoadFailure() { $graph = array( 'A' => array('B'), ); $raised = null; try { $this->findGraphCycle($graph); } catch (Exception $ex) { $raised = $ex; } $this->assertTrue( (bool)$raised, pht('Exception raised by unloadable edges.')); } - public function testTopographicSortTree() { + public function testTopologicalOrder() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D', 'E'), 'C' => array(), 'D' => array(), 'E' => array(), ); - $sorted = $this->getTopographicSort($graph); + $sorted = $this->getTopologicalOrder($graph); $this->assertEqual( array('A', 'C', 'B', 'E', 'D'), $sorted, - pht('Topographically sorted tree.')); + pht('Topologically sorted tree.')); $graph = array( 'A' => array('B', 'C'), 'B' => array('C'), 'C' => array('D', 'E'), 'D' => array('E'), 'E' => array(), ); - $sorted = $this->getTopographicSort($graph); + $sorted = $this->getTopologicalOrder($graph); $this->assertEqual( array('A', 'B', 'C', 'D', 'E'), $sorted, - pht('Topographically sorted tree with nesting.')); + pht('Topologically sorted tree with nesting.')); } - public function testBestEffortTopographicSortTree() { + public function testRoughTopologicalOrder() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D', 'E'), 'C' => array(), 'D' => array(), 'E' => array(), 'F' => array('H'), 'G' => array('F', 'E'), 'H' => array('G'), ); - $sorted = $this->getBestEffortTopographicSort($graph); + $sorted = $this->getRoughTopologicalOrder($graph); $this->assertEqual(count($graph), count($sorted)); $this->assertEqual('C', $sorted[0]['node']); $this->assertEqual('D', $sorted[1]['node']); $this->assertEqual('E', $sorted[2]['node']); $this->assertEqual('B', $sorted[3]['node']); $this->assertEqual('A', $sorted[4]['node']); $this->assertEqual('F', $sorted[5]['node']); $this->assertEqual('G', $sorted[6]['node']); $this->assertEqual('H', $sorted[7]['node']); $this->assertEqual(0, $sorted[0]['depth']); $this->assertEqual(0, $sorted[1]['depth']); $this->assertEqual(0, $sorted[2]['depth']); $this->assertEqual(1, $sorted[3]['depth']); $this->assertEqual(2, $sorted[4]['depth']); $this->assertEqual(3, $sorted[5]['depth']); $this->assertEqual(3, $sorted[6]['depth']); $this->assertEqual(3, $sorted[7]['depth']); $this->assertEqual(false, $sorted[0]['cycle']); $this->assertEqual(false, $sorted[1]['cycle']); $this->assertEqual(false, $sorted[2]['cycle']); $this->assertEqual(false, $sorted[3]['cycle']); $this->assertEqual(false, $sorted[4]['cycle']); $this->assertEqual(true, $sorted[5]['cycle']); $this->assertEqual(true, $sorted[6]['cycle']); $this->assertEqual(true, $sorted[7]['cycle']); } private function findGraphCycle(array $graph, $seed = 'A', $search = 'A') { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_select_keys($graph, array($seed))); $detector->loadGraph(); return $detector->detectCycles($search); } - private function getTopographicSort(array $graph, $seed = 'A') { + private function getTopologicalOrder(array $graph, $seed = 'A') { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_select_keys($graph, array($seed))); $detector->loadGraph(); - return $detector->getTopographicallySortedNodes(); + return $detector->getNodesInTopologicalOrder(); } - private function getBestEffortTopographicSort(array $graph) { + private function getRoughTopologicalOrder(array $graph) { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_keys($graph)); - return $detector->getBestEffortTopographicallySortedNodes(); + return $detector->getNodesInRoughTopologicalOrder(); } }