Changeset View
Changeset View
Standalone View
Standalone View
src/applications/system/controller/PhabricatorRobotsController.php
<?php | <?php | ||||
final class PhabricatorRobotsController extends PhabricatorController { | abstract class PhabricatorRobotsController extends PhabricatorController { | ||||
public function shouldRequireLogin() { | public function shouldRequireLogin() { | ||||
return false; | return false; | ||||
} | } | ||||
public function processRequest() { | final public function processRequest() { | ||||
$out = array(); | $out = $this->newRobotsRules(); | ||||
// Prevent indexing of '/diffusion/', since the content is not generally | |||||
// useful to index, web spiders get stuck scraping the history of every | |||||
// file, and much of the content is Ajaxed in anyway so spiders won't even | |||||
// see it. These pages are also relatively expensive to generate. | |||||
// Note that this still allows commits (at '/rPxxxxx') to be indexed. | |||||
// They're probably not hugely useful, but suffer fewer of the problems | |||||
// Diffusion suffers and are hard to omit with 'robots.txt'. | |||||
$out[] = 'User-Agent: *'; | |||||
$out[] = 'Disallow: /diffusion/'; | |||||
$out[] = 'Disallow: /source/'; | |||||
// Add a small crawl delay (number of seconds between requests) for spiders | // Add a small crawl delay (number of seconds between requests) for spiders | ||||
// which respect it. The intent here is to prevent spiders from affecting | // which respect it. The intent here is to prevent spiders from affecting | ||||
// performance for users. The possible cost is slower indexing, but that | // performance for users. The possible cost is slower indexing, but that | ||||
// seems like a reasonable tradeoff, since most Phabricator installs are | // seems like a reasonable tradeoff, since most Phabricator installs are | ||||
// probably not hugely concerned about cutting-edge SEO. | // probably not hugely concerned about cutting-edge SEO. | ||||
$out[] = 'Crawl-delay: 1'; | $out[] = 'Crawl-delay: 1'; | ||||
$content = implode("\n", $out)."\n"; | $content = implode("\n", $out)."\n"; | ||||
return id(new AphrontPlainTextResponse()) | return id(new AphrontPlainTextResponse()) | ||||
->setContent($content) | ->setContent($content) | ||||
->setCacheDurationInSeconds(phutil_units('2 hours in seconds')) | ->setCacheDurationInSeconds(phutil_units('2 hours in seconds')) | ||||
->setCanCDN(true); | ->setCanCDN(true); | ||||
} | } | ||||
abstract protected function newRobotsRules(); | |||||
} | } |