Differential D18534 Diff 44513 src/applications/search/ferret/PhabricatorFerretEngine.php

Changeset View

Standalone View

src/applications/search/ferret/PhabricatorFerretEngine.php

Show All 10 Lines	abstract class PhabricatorFerretEngine extends Phobject {
}		}

public function tokenizeString($value) {		public function tokenizeString($value) {
$value = trim($value, ' ');		$value = trim($value, ' ');
$value = preg_split('/ +/', $value);		$value = preg_split('/ +/', $value);
return $value;		return $value;
}		}

public function getNgramsFromString($value, $mode) {		public function getTermNgramsFromString($string) {
		return $this->getNgramsFromString($string, true);
		}

		public function getSubstringNgramsFromString($string) {
		return $this->getNgramsFromString($string, false);
		}

		private function getNgramsFromString($value, $as_term) {
$tokens = $this->tokenizeString($value);		$tokens = $this->tokenizeString($value);

$ngrams = array();		$ngrams = array();
foreach ($tokens as $token) {		foreach ($tokens as $token) {
$token = phutil_utf8_strtolower($token);		$token = phutil_utf8_strtolower($token);

switch ($mode) {		if ($as_term) {
case 'query':
break;
case 'index':
$token = ' '.$token.' ';		$token = ' '.$token.' ';
break;
case 'prefix':
$token = ' '.$token;
break;
}		}

$token_v = phutil_utf8v($token);		$token_v = phutil_utf8v($token);
$len = (count($token_v) - 2);		$len = (count($token_v) - 2);
for ($ii = 0; $ii < $len; $ii++) {		for ($ii = 0; $ii < $len; $ii++) {
$ngram = array_slice($token_v, $ii, 3);		$ngram = array_slice($token_v, $ii, 3);
$ngram = implode('', $ngram);		$ngram = implode('', $ngram);
$ngrams[$ngram] = $ngram;		$ngrams[$ngram] = $ngram;
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	public function newTermsCorpus($raw_corpus) {
// NOTE: Single quotes divide terms only if they're at a word boundary.		// NOTE: Single quotes divide terms only if they're at a word boundary.
// In contractions, like "whom'st've", the entire word is a single term.		// In contractions, like "whom'st've", the entire word is a single term.
$term_corpus = preg_replace('/(^\| )[\']+/', ' ', $term_corpus);		$term_corpus = preg_replace('/(^\| )[\']+/', ' ', $term_corpus);
$term_corpus = preg_replace('/[\']+( \|$)/', ' ', $term_corpus);		$term_corpus = preg_replace('/[\']+( \|$)/', ' ', $term_corpus);

$term_corpus = preg_replace('/\s+/u', ' ', $term_corpus);		$term_corpus = preg_replace('/\s+/u', ' ', $term_corpus);
$term_corpus = trim($term_corpus, ' ');		$term_corpus = trim($term_corpus, ' ');

		if (strlen($term_corpus)) {
		$term_corpus = ' '.$term_corpus.' ';
		}

return $term_corpus;		return $term_corpus;
}		}

}		}