diff --git a/src/applications/diviner/workflow/DivinerAtomizeWorkflow.php b/src/applications/diviner/workflow/DivinerAtomizeWorkflow.php index 7e00ebfe03..aff3312bb7 100644 --- a/src/applications/diviner/workflow/DivinerAtomizeWorkflow.php +++ b/src/applications/diviner/workflow/DivinerAtomizeWorkflow.php @@ -1,137 +1,137 @@ setName('atomize') ->setSynopsis(pht('Build atoms from source.')) ->setArguments( array( array( 'name' => 'atomizer', 'param' => 'class', 'help' => pht('Specify a subclass of DivinerAtomizer.'), ), array( 'name' => 'book', 'param' => 'path', 'help' => pht('Path to a Diviner book configuration.'), ), array( 'name' => 'files', 'wildcard' => true, ), array( 'name' => 'ugly', 'help' => pht('Produce ugly (but faster) output.'), ), )); } public function execute(PhutilArgumentParser $args) { - $this->readBookConfiguration($args); + $this->readBookConfiguration($args->getArg('book')); $console = PhutilConsole::getConsole(); $atomizer_class = $args->getArg('atomizer'); if (!$atomizer_class) { throw new Exception("Specify an atomizer class with --atomizer."); } $symbols = id(new PhutilSymbolLoader()) ->setName($atomizer_class) ->setConcreteOnly(true) ->setAncestorClass('DivinerAtomizer') ->selectAndLoadSymbols(); if (!$symbols) { throw new Exception( "Atomizer class '{$atomizer_class}' must be a concrete subclass of ". "DivinerAtomizer."); } $atomizer = newv($atomizer_class, array()); $files = $args->getArg('files'); if (!$files) { throw new Exception("Specify one or more files to atomize."); } $file_atomizer = new DivinerFileAtomizer(); foreach (array($atomizer, $file_atomizer) as $configure) { $configure->setBook($this->getConfig('name')); } $group_rules = array(); foreach ($this->getConfig('groups', array()) as $group => $spec) { $include = (array)idx($spec, 'include', array()); foreach ($include as $pattern) { $group_rules[$pattern] = $group; } } $all_atoms = array(); $context = array( 'group' => null, ); foreach ($files as $file) { $abs_path = Filesystem::resolvePath($file, $this->getConfig('root')); $data = Filesystem::readFile($abs_path); if (!$this->shouldAtomizeFile($file, $data)) { $console->writeLog("Skipping %s...\n", $file); continue; } else { $console->writeLog("Atomizing %s...\n", $file); } $context['group'] = null; foreach ($group_rules as $rule => $group) { if (preg_match($rule, $file)) { $context['group'] = $group; break; } } $file_atoms = $file_atomizer->atomize($file, $data, $context); $all_atoms[] = $file_atoms; if (count($file_atoms) !== 1) { throw new Exception("Expected exactly one atom from file atomizer."); } $file_atom = head($file_atoms); $atoms = $atomizer->atomize($file, $data, $context); foreach ($atoms as $atom) { if (!$atom->hasParent()) { $file_atom->addChild($atom); } } $all_atoms[] = $atoms; } $all_atoms = array_mergev($all_atoms); $all_atoms = mpull($all_atoms, 'toDictionary'); $all_atoms = ipull($all_atoms, null, 'hash'); if ($args->getArg('ugly')) { $json = json_encode($all_atoms); } else { $json_encoder = new PhutilJSON(); $json = $json_encoder->encodeFormatted($all_atoms); } $console->writeOut('%s', $json); return 0; } private function shouldAtomizeFile($file_name, $file_data) { return (strpos($file_data, '@'.'undivinable') === false); } } diff --git a/src/applications/diviner/workflow/DivinerGenerateWorkflow.php b/src/applications/diviner/workflow/DivinerGenerateWorkflow.php index c3511cb785..6975f85c76 100644 --- a/src/applications/diviner/workflow/DivinerGenerateWorkflow.php +++ b/src/applications/diviner/workflow/DivinerGenerateWorkflow.php @@ -1,491 +1,525 @@ setName('generate') ->setSynopsis(pht('Generate documentation.')) ->setArguments( array( array( 'name' => 'clean', 'help' => 'Clear the caches before generating documentation.', ), array( 'name' => 'book', 'param' => 'path', 'help' => 'Path to a Diviner book configuration.', ), )); } protected function getAtomCache() { if (!$this->atomCache) { $book_root = $this->getConfig('root'); $book_name = $this->getConfig('name'); $cache_directory = $book_root.'/.divinercache/'.$book_name; $this->atomCache = new DivinerAtomCache($cache_directory); } return $this->atomCache; } protected function log($message) { $console = PhutilConsole::getConsole(); $console->writeErr($message."\n"); } public function execute(PhutilArgumentParser $args) { - $this->readBookConfiguration($args); + $book = $args->getArg('book'); + if ($book) { + $books = array($book); + } else { + $cwd = getcwd(); + $this->log(pht('FINDING DOCUMENTATION BOOKS')); + $books = id(new FileFinder($cwd)) + ->withType('f') + ->withSuffix('book') + ->find(); + + if (!$books) { + throw new PhutilArgumentUsageException( + pht( + "There are no Diviner '.book' files anywhere beneath the ". + "current directory. Use '--book ' to specify a ". + "documentation book to generate.")); + } else { + $this->log(pht('Found %s book(s).', new PhutilNumber(count($books)))); + } + } + + foreach ($books as $book) { + $short_name = basename($book); + + $this->log(pht('Generating book "%s"...', $short_name)); + $this->generateBook($book, $args); + $this->log(pht('Completed generation of "%s".', $short_name)."\n"); + } + } + + private function generateBook($book, PhutilArgumentParser $args) { + $this->atomCache = null; + + $this->readBookConfiguration($book); if ($args->getArg('clean')) { $this->log(pht('CLEARING CACHES')); $this->getAtomCache()->delete(); $this->log(pht('Done.')."\n"); } // The major challenge of documentation generation is one of dependency // management. When regenerating documentation, we want to do the smallest // amount of work we can, so that regenerating documentation after minor // changes is quick. // // ATOM CACHE // // In the first stage, we find all the direct changes to source code since // the last run. This stage relies on two data structures: // // - File Hash Map: map // - Atom Map: map // // First, we hash all the source files in the project to detect any which // have changed since the previous run (i.e., their hash is not present in // the File Hash Map). If a file's content hash appears in the map, it has // not changed, so we don't need to reparse it. // // We break the contents of each file into "atoms", which represent a unit // of source code (like a function, method, class or file). Each atom has a // "node hash" based on the content of the atom: if a function definition // changes, the node hash of the atom changes too. The primary output of // the atom cache is a list of node hashes which exist in the project. This // is the Atom Map. The node hash depends only on the definition of the atom // and the atomizer implementation. It ends with an "N", for "node". // // (We need the Atom Map in addition to the File Hash Map because each file // may have several atoms in it (e.g., multiple functions, or a class and // its methods). The File Hash Map contains an exhaustive list of all atoms // with type "file", but not child atoms of those top-level atoms.) // // GRAPH CACHE // // We now know which atoms exist, and can compare the Atom Map to some // existing cache to figure out what has changed. However, this isn't // sufficient to figure out which documentation actually needs to be // regnerated, because atoms depend on other atoms. For example, if "B // extends A" and the definition for A changes, we need to regenerate the // documentation in B. Similarly, if X links to Y and Y changes, we should // regenerate X. (In both these cases, the documentation for the connected // atom may not acutally change, but in some cases it will, and the extra // work we need to do is generally very small compared to the size of the // project.) // // To figure out which other nodes have changed, we compute a "graph hash" // for each node. This hash combines the "node hash" with the node hashes // of connected nodes. Our primary output is a list of graph hashes, which // a documentation generator can use to easily determine what work needs // to be done by comparing the list with a list of cached graph hashes, // then generating documentation for new hashes and deleting documentation // for missing hashes. The graph hash ends with a "G", for "graph". // // In this stage, we rely on three data structures: // // - Symbol Map: map // - Edge Map: map> // - Graph Map: map // // Calculating the graph hash requires several steps, because we need to // figure out which nodes an atom is attached to. The atom contains symbolic // references to other nodes by name (e.g., "extends SomeClass") in the form // of DivinerAtomRefs. We can also build a symbolic reference for any atom // from the atom itself. Each DivinerAtomRef generates a symbol hash, // which ends with an "S", for "symbol". // // First, we update the symbol map. We remove (and mark dirty) any symbols // associated with node hashes which no longer exist (e.g., old/dead nodes). // Second, we add (and mark dirty) any symbols associated with new nodes. // We also add edges defined by new nodes to the graph. // // We initialize a list of dirty nodes to the list of new nodes, then // find all nodes connected to dirty symbols and add them to the dirty // node list. This list now contains every node with a new or changed // graph hash. // // We walk the dirty list and compute the new graph hashes, adding them // to the graph hash map. This Graph Map can then be passed to an actual // documentation generator, which can compare the graph hashes to a list // of already-generated graph hashes and easily assess which documents need // to be regenerated and which can be deleted. $this->buildAtomCache(); $this->buildGraphCache(); $this->publishDocumentation($args->getArg('clean')); } /* -( Atom Cache )--------------------------------------------------------- */ private function buildAtomCache() { $this->log(pht('BUILDING ATOM CACHE')); $file_hashes = $this->findFilesInProject(); $this->log(pht('Found %d file(s) in project.', count($file_hashes))); $this->deleteDeadAtoms($file_hashes); $atomize = $this->getFilesToAtomize($file_hashes); $this->log(pht('Found %d unatomized, uncached file(s).', count($atomize))); $file_atomizers = $this->getAtomizersForFiles($atomize); $this->log(pht('Found %d file(s) to atomize.', count($file_atomizers))); $futures = $this->buildAtomizerFutures($file_atomizers); $this->log(pht('Atomizing %d file(s).', count($file_atomizers))); if ($futures) { $this->resolveAtomizerFutures($futures, $file_hashes); $this->log(pht("Atomization complete.")); } else { $this->log(pht("Atom cache is up to date, no files to atomize.")); } $this->log(pht("Writing atom cache.")); $this->getAtomCache()->saveAtoms(); $this->log(pht('Done.')."\n"); } private function getAtomizersForFiles(array $files) { $rules = $this->getRules(); $exclude = $this->getExclude(); $atomizers = array(); foreach ($files as $file) { foreach ($exclude as $pattern) { if (preg_match($pattern, $file)) { continue 2; } } foreach ($rules as $rule => $atomizer) { $ok = preg_match($rule, $file); if ($ok === false) { throw new Exception( "Rule '{$rule}' is not a valid regular expression."); } if ($ok) { $atomizers[$file] = $atomizer; continue; } } } return $atomizers; } private function getRules() { $rules = $this->getConfig('rules', array( '/\\.diviner$/' => 'DivinerArticleAtomizer', '/\\.php$/' => 'DivinerPHPAtomizer', )); return $rules; } private function getExclude() { $exclude = (array)$this->getConfig('exclude', array()); return $exclude; } private function findFilesInProject() { $raw_hashes = id(new FileFinder($this->getConfig('root'))) ->excludePath('*/.*') ->withType('f') ->setGenerateChecksums(true) ->find(); $version = $this->getDivinerAtomWorldVersion(); $file_hashes = array(); foreach ($raw_hashes as $file => $md5_hash) { $rel_file = Filesystem::readablePath($file, $this->getConfig('root')); // We want the hash to change if the file moves or Diviner gets updated, // not just if the file content changes. Derive a hash from everything // we care about. $file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F'; } return $file_hashes; } private function deleteDeadAtoms(array $file_hashes) { $atom_cache = $this->getAtomCache(); $hash_to_file = array_flip($file_hashes); foreach ($atom_cache->getFileHashMap() as $hash => $atom) { if (empty($hash_to_file[$hash])) { $atom_cache->deleteFileHash($hash); } } } private function getFilesToAtomize(array $file_hashes) { $atom_cache = $this->getAtomCache(); $atomize = array(); foreach ($file_hashes as $file => $hash) { if (!$atom_cache->fileHashExists($hash)) { $atomize[] = $file; } } return $atomize; } private function buildAtomizerFutures(array $file_atomizers) { $atomizers = array(); foreach ($file_atomizers as $file => $atomizer) { $atomizers[$atomizer][] = $file; } $root = dirname(phutil_get_library_root('phabricator')); $config_root = $this->getConfig('root'); $bar = id(new PhutilConsoleProgressBar()) ->setTotal(count($file_atomizers)); $futures = array(); foreach ($atomizers as $class => $files) { foreach (array_chunk($files, 32) as $chunk) { $future = new ExecFuture( '%s atomize --ugly --book %s --atomizer %s -- %Ls', $root.'/bin/diviner', $this->getBookConfigPath(), $class, $chunk); $future->setCWD($config_root); $futures[] = $future; $bar->update(count($chunk)); } } $bar->done(); return $futures; } private function resolveAtomizerFutures(array $futures, array $file_hashes) { assert_instances_of($futures, 'Future'); $atom_cache = $this->getAtomCache(); $bar = id(new PhutilConsoleProgressBar()) ->setTotal(count($futures)); foreach (Futures($futures)->limit(4) as $key => $future) { $atoms = $future->resolveJSON(); foreach ($atoms as $atom) { if ($atom['type'] == DivinerAtom::TYPE_FILE) { $file_hash = $file_hashes[$atom['file']]; $atom_cache->addFileHash($file_hash, $atom['hash']); } $atom_cache->addAtom($atom); } $bar->update(1); } $bar->done(); } /** * Get a global version number, which changes whenever any atom or atomizer * implementation changes in a way which is not backward-compatible. */ private function getDivinerAtomWorldVersion() { $version = array(); $version['atom'] = DivinerAtom::getAtomSerializationVersion(); $version['rules'] = $this->getRules(); $atomizers = id(new PhutilSymbolLoader()) ->setAncestorClass('DivinerAtomizer') ->setConcreteOnly(true) ->selectAndLoadSymbols(); $atomizer_versions = array(); foreach ($atomizers as $atomizer) { $atomizer_versions[$atomizer['name']] = call_user_func( array( $atomizer['name'], 'getAtomizerVersion', )); } ksort($atomizer_versions); $version['atomizers'] = $atomizer_versions; return md5(serialize($version)); } /* -( Graph Cache )-------------------------------------------------------- */ private function buildGraphCache() { $this->log(pht('BUILDING GRAPH CACHE')); $atom_cache = $this->getAtomCache(); $symbol_map = $atom_cache->getSymbolMap(); $atoms = $atom_cache->getAtomMap(); $dirty_symbols = array(); $dirty_nhashes = array(); $del_atoms = array_diff_key($symbol_map, $atoms); $this->log(pht('Found %d obsolete atom(s) in graph.', count($del_atoms))); foreach ($del_atoms as $nhash => $shash) { $atom_cache->deleteSymbol($nhash); $dirty_symbols[$shash] = true; $atom_cache->deleteEdges($nhash); $atom_cache->deleteGraph($nhash); } $new_atoms = array_diff_key($atoms, $symbol_map); $this->log(pht('Found %d new atom(s) in graph.', count($new_atoms))); foreach ($new_atoms as $nhash => $ignored) { $shash = $this->computeSymbolHash($nhash); $atom_cache->addSymbol($nhash, $shash); $dirty_symbols[$shash] = true; $atom_cache->addEdges( $nhash, $this->getEdges($nhash)); $dirty_nhashes[$nhash] = true; } $this->log(pht('Propagating changes through the graph.')); // Find all the nodes which point at a dirty node, and dirty them. Then // find all the nodes which point at those nodes and dirty them, and so // on. (This is slightly overkill since we probably don't need to propagate // dirtiness across documentation "links" between symbols, but we do want // to propagate it across "extends", and we suffer only a little bit of // collateral damage by over-dirtying as long as the documentation isn't // too well-connected.) $symbol_stack = array_keys($dirty_symbols); while ($symbol_stack) { $symbol_hash = array_pop($symbol_stack); foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) { $dirty_nhashes[$edge] = true; $src_hash = $this->computeSymbolHash($edge); if (empty($dirty_symbols[$src_hash])) { $dirty_symbols[$src_hash] = true; $symbol_stack[] = $src_hash; } } } $this->log(pht('Found %d affected atoms.', count($dirty_nhashes))); foreach ($dirty_nhashes as $nhash => $ignored) { $atom_cache->addGraph($nhash, $this->computeGraphHash($nhash)); } $this->log(pht('Writing graph cache.')); $atom_cache->saveGraph(); $atom_cache->saveEdges(); $atom_cache->saveSymbols(); $this->log(pht('Done.')."\n"); } private function computeSymbolHash($node_hash) { $atom_cache = $this->getAtomCache(); $atom = $atom_cache->getAtom($node_hash); if (!$atom) { throw new Exception("No such atom with node hash '{$node_hash}'!"); } $ref = DivinerAtomRef::newFromDictionary($atom['ref']); return $ref->toHash(); } private function getEdges($node_hash) { $atom_cache = $this->getAtomCache(); $atom = $atom_cache->getAtom($node_hash); $refs = array(); // Make the atom depend on its own symbol, so that all atoms with the same // symbol are dirtied (e.g., if a codebase defines the function "f()" // several times, all of them should be dirtied when one is dirtied). $refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true; foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) { $ref = DivinerAtomRef::newFromDictionary($ref_dict); if ($ref->getBook() == $atom['book']) { $refs[$ref->toHash()] = true; } } return array_keys($refs); } private function computeGraphHash($node_hash) { $atom_cache = $this->getAtomCache(); $atom = $atom_cache->getAtom($node_hash); $edges = $this->getEdges($node_hash); sort($edges); $inputs = array( 'atomHash' => $atom['hash'], 'edges' => $edges, ); return md5(serialize($inputs)).'G'; } private function publishDocumentation($clean) { $atom_cache = $this->getAtomCache(); $graph_map = $atom_cache->getGraphMap(); $this->log(pht('PUBLISHING DOCUMENTATION')); $publisher = new DivinerLivePublisher(); $publisher->setDropCaches($clean); $publisher->setConfig($this->getAllConfig()); $publisher->setAtomCache($atom_cache); $publisher->setRenderer(new DivinerDefaultRenderer()); $publisher->publishAtoms(array_values($graph_map)); $this->log(pht('Done.')); } } diff --git a/src/applications/diviner/workflow/DivinerWorkflow.php b/src/applications/diviner/workflow/DivinerWorkflow.php index 43b402aa4a..e0efa46d9a 100644 --- a/src/applications/diviner/workflow/DivinerWorkflow.php +++ b/src/applications/diviner/workflow/DivinerWorkflow.php @@ -1,77 +1,76 @@ bookConfigPath; } protected function getConfig($key, $default = null) { return idx($this->config, $key, $default); } protected function getAllConfig() { return $this->config; } - protected function readBookConfiguration(PhutilArgumentParser $args) { - $book_path = $args->getArg('book'); + protected function readBookConfiguration($book_path) { if ($book_path === null) { throw new PhutilArgumentUsageException( "Specify a Diviner book configuration file with --book."); } $book_data = Filesystem::readFile($book_path); $book = json_decode($book_data, true); if (!is_array($book)) { throw new PhutilArgumentUsageException( "Book configuration '{$book_path}' is not in JSON format."); } PhutilTypeSpec::checkMap( $book, array( 'name' => 'string', 'title' => 'optional string', 'short' => 'optional string', 'preface' => 'optional string', 'root' => 'optional string', 'uri.source' => 'optional string', 'rules' => 'optional map', 'exclude' => 'optional regex|list', 'groups' => 'optional map>', )); // If the book specifies a "root", resolve it; otherwise, use the directory // the book configuration file lives in. $full_path = dirname(Filesystem::resolvePath($book_path)); if (empty($book['root'])) { $book['root'] = '.'; } $book['root'] = Filesystem::resolvePath($book['root'], $full_path); if (!preg_match('/^[a-z][a-z-]*$/', $book['name'])) { $name = $book['name']; throw new PhutilArgumentUsageException( "Book configuration '{$book_path}' has name '{$name}', but book names ". "must include only lowercase letters and hyphens."); } foreach (idx($book, 'groups', array()) as $group) { PhutilTypeSpec::checkmap( $group, array( 'name' => 'string', 'include' => 'optional regex|list', )); } $this->bookConfigPath = $book_path; $this->config = $book; } }