diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -427,6 +427,8 @@ 'ArcanistRepositoryQuery' => 'repository/query/ArcanistRepositoryQuery.php', 'ArcanistRepositoryRef' => 'ref/ArcanistRepositoryRef.php', 'ArcanistRepositoryRemoteQuery' => 'repository/remote/ArcanistRepositoryRemoteQuery.php', + 'ArcanistRepositoryURINormalizer' => 'repository/remote/ArcanistRepositoryURINormalizer.php', + 'ArcanistRepositoryURINormalizerTestCase' => 'repository/remote/__tests__/ArcanistRepositoryURINormalizerTestCase.php', 'ArcanistReusedAsIteratorXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistReusedAsIteratorXHPASTLinterRule.php', 'ArcanistReusedAsIteratorXHPASTLinterRuleTestCase' => 'lint/linter/xhpast/rules/__tests__/ArcanistReusedAsIteratorXHPASTLinterRuleTestCase.php', 'ArcanistReusedIteratorReferenceXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistReusedIteratorReferenceXHPASTLinterRule.php', @@ -1455,6 +1457,8 @@ 'ArcanistRepositoryQuery' => 'Phobject', 'ArcanistRepositoryRef' => 'ArcanistRef', 'ArcanistRepositoryRemoteQuery' => 'ArcanistRepositoryQuery', + 'ArcanistRepositoryURINormalizer' => 'Phobject', + 'ArcanistRepositoryURINormalizerTestCase' => 'PhutilTestCase', 'ArcanistReusedAsIteratorXHPASTLinterRule' => 'ArcanistXHPASTLinterRule', 'ArcanistReusedAsIteratorXHPASTLinterRuleTestCase' => 'ArcanistXHPASTLinterRuleTestCase', 'ArcanistReusedIteratorReferenceXHPASTLinterRule' => 'ArcanistXHPASTLinterRule', diff --git a/src/repository/remote/ArcanistRepositoryURINormalizer.php b/src/repository/remote/ArcanistRepositoryURINormalizer.php new file mode 100644 --- /dev/null +++ b/src/repository/remote/ArcanistRepositoryURINormalizer.php @@ -0,0 +1,159 @@ +getNormalizedPath() === $norm_b->getNormalizedPath()) { + * // URIs appear to point at the same repository. + * } else { + * // URIs are very unlikely to be the same repository. + * } + * + * Because a repository can be hosted at arbitrarily many arbitrary URIs, there + * is no way to completely prevent false negatives by only examining URIs + * (that is, repositories with totally different URIs could really be the same). + * However, normalization is relatively aggressive and false negatives should + * be rare: if normalization says two URIs are different repositories, they + * probably are. + * + * @task normal Normalizing URIs + */ +final class ArcanistRepositoryURINormalizer + extends Phobject { + + const TYPE_GIT = 'git'; + const TYPE_SVN = 'svn'; + const TYPE_MERCURIAL = 'hg'; + + private $type; + private $uri; + private $domainMap = array(); + + public function __construct($type, $uri) { + switch ($type) { + case self::TYPE_GIT: + case self::TYPE_SVN: + case self::TYPE_MERCURIAL: + break; + default: + throw new Exception(pht('Unknown URI type "%s"!', $type)); + } + + $this->type = $type; + $this->uri = $uri; + } + + public static function getAllURITypes() { + return array( + self::TYPE_GIT, + self::TYPE_SVN, + self::TYPE_MERCURIAL, + ); + } + + public function setDomainMap(array $domain_map) { + foreach ($domain_map as $key => $domain) { + $domain_map[$key] = phutil_utf8_strtolower($domain); + } + + $this->domainMap = $domain_map; + return $this; + } + + +/* -( Normalizing URIs )--------------------------------------------------- */ + + + /** + * @task normal + */ + public function getPath() { + switch ($this->type) { + case self::TYPE_GIT: + $uri = new PhutilURI($this->uri); + return $uri->getPath(); + case self::TYPE_SVN: + case self::TYPE_MERCURIAL: + $uri = new PhutilURI($this->uri); + if ($uri->getProtocol()) { + return $uri->getPath(); + } + + return $this->uri; + } + } + + public function getNormalizedURI() { + return $this->getNormalizedDomain().'/'.$this->getNormalizedPath(); + } + + + /** + * @task normal + */ + public function getNormalizedPath() { + $path = $this->getPath(); + $path = trim($path, '/'); + + switch ($this->type) { + case self::TYPE_GIT: + $path = preg_replace('/\.git$/', '', $path); + break; + case self::TYPE_SVN: + case self::TYPE_MERCURIAL: + break; + } + + // If this is a Phabricator URI, strip it down to the callsign. We mutably + // allow you to clone repositories as "/diffusion/X/anything.git", for + // example. + + $matches = null; + if (preg_match('@^(diffusion/(?:[A-Z]+|\d+))@', $path, $matches)) { + $path = $matches[1]; + } + + return $path; + } + + public function getNormalizedDomain() { + $domain = null; + + $uri = new PhutilURI($this->uri); + $domain = $uri->getDomain(); + + if (!strlen($domain)) { + return ''; + } + + $domain = phutil_utf8_strtolower($domain); + + foreach ($this->domainMap as $domain_key => $domain_value) { + if ($domain === $domain_value) { + $domain = $domain_key; + break; + } + } + + return $domain; + } + +} diff --git a/src/repository/remote/__tests__/ArcanistRepositoryURINormalizerTestCase.php b/src/repository/remote/__tests__/ArcanistRepositoryURINormalizerTestCase.php new file mode 100644 --- /dev/null +++ b/src/repository/remote/__tests__/ArcanistRepositoryURINormalizerTestCase.php @@ -0,0 +1,84 @@ + 'path', + 'https://user@domain.com/path.git' => 'path', + 'git@domain.com:path.git' => 'path', + 'ssh://user@gitserv002.com/path.git' => 'path', + 'ssh://htaft@domain.com/path.git' => 'path', + 'ssh://user@domain.com/bananas.git' => 'bananas', + 'git@domain.com:bananas.git' => 'bananas', + 'user@domain.com:path/repo' => 'path/repo', + 'user@domain.com:path/repo/' => 'path/repo', + 'file:///path/to/local/repo.git' => 'path/to/local/repo', + '/path/to/local/repo.git' => 'path/to/local/repo', + 'ssh://something.com/diffusion/X/anything.git' => 'diffusion/X', + 'ssh://something.com/diffusion/X/' => 'diffusion/X', + ); + + $type_git = ArcanistRepositoryURINormalizer::TYPE_GIT; + + foreach ($cases as $input => $expect) { + $normal = new ArcanistRepositoryURINormalizer($type_git, $input); + $this->assertEqual( + $expect, + $normal->getNormalizedPath(), + pht('Normalized Git path for "%s".', $input)); + } + } + + public function testDomainURINormalizer() { + $base_domain = 'base.phabricator.example.com'; + $ssh_domain = 'ssh.phabricator.example.com'; + + $domain_map = array( + '' => $base_domain, + '' => $ssh_domain, + ); + + $cases = array( + '/' => '', + '/path/to/local/repo.git' => '', + 'ssh://user@domain.com/path.git' => 'domain.com', + 'ssh://user@DOMAIN.COM/path.git' => 'domain.com', + 'http://'.$base_domain.'/diffusion/X/' => '', + 'ssh://'.$ssh_domain.'/diffusion/X/' => '', + 'git@'.$ssh_domain.':bananas.git' => '', + ); + + $type_git = ArcanistRepositoryURINormalizer::TYPE_GIT; + + foreach ($cases as $input => $expect) { + $normalizer = new ArcanistRepositoryURINormalizer($type_git, $input); + + $normalizer->setDomainMap($domain_map); + + $this->assertEqual( + $expect, + $normalizer->getNormalizedDomain(), + pht('Normalized domain for "%s".', $input)); + } + } + + public function testSVNURINormalizer() { + $cases = array( + 'file:///path/to/repo' => 'path/to/repo', + 'file:///path/to/repo/' => 'path/to/repo', + ); + + $type_svn = ArcanistRepositoryURINormalizer::TYPE_SVN; + + foreach ($cases as $input => $expect) { + $normal = new ArcanistRepositoryURINormalizer($type_svn, $input); + $this->assertEqual( + $expect, + $normal->getNormalizedPath(), + pht('Normalized SVN path for "%s".', $input)); + } + } + +}