diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -71,7 +71,13 @@ 'LinesOfALargeFile' => 'filesystem/linesofalarge/LinesOfALargeFile.php', 'LinesOfALargeFileTestCase' => 'filesystem/linesofalarge/__tests__/LinesOfALargeFileTestCase.php', 'MFilterTestHelper' => 'utils/__tests__/MFilterTestHelper.php', - 'PHPASTParserTestCase' => 'parser/xhpast/__tests__/PHPASTParserTestCase.php', + 'PHPASTNode' => 'parser/php/PHPASTNode.php', + 'PHPASTNodeTestCase' => 'parser/php/__tests__/PHPASTNodeTestCase.php', + 'PHPASTParserTestCase' => 'parser/php/__tests__/PHPASTParserTestCase.php', + 'PHPASTSyntaxErrorException' => 'parser/php/PHPASTSyntaxErrorException.php', + 'PHPASTToken' => 'parser/php/PHPASTToken.php', + 'PHPASTTree' => 'parser/php/PHPASTTree.php', + 'PHPASTTreeTestCase' => 'parser/php/__tests__/PHPASTTreeTestCase.php', 'PhageAgentBootloader' => 'phage/bootloader/PhageAgentBootloader.php', 'PhageAgentTestCase' => 'phage/__tests__/PhageAgentTestCase.php', 'PhagePHPAgent' => 'phage/agent/PhagePHPAgent.php', @@ -356,6 +362,7 @@ 'TestAbstractDirectedGraph' => 'utils/__tests__/TestAbstractDirectedGraph.php', 'XHPASTNode' => 'parser/xhpast/api/XHPASTNode.php', 'XHPASTNodeTestCase' => 'parser/xhpast/api/__tests__/XHPASTNodeTestCase.php', + 'XHPASTParserTestCase' => 'parser/xhpast/__tests__/XHPASTParserTestCase.php', 'XHPASTSyntaxErrorException' => 'parser/xhpast/api/XHPASTSyntaxErrorException.php', 'XHPASTToken' => 'parser/xhpast/api/XHPASTToken.php', 'XHPASTTree' => 'parser/xhpast/api/XHPASTTree.php', @@ -534,7 +541,13 @@ 'LinesOfALargeExecFutureTestCase' => 'PhutilTestCase', 'LinesOfALargeFile' => 'LinesOfALarge', 'LinesOfALargeFileTestCase' => 'PhutilTestCase', + 'PHPASTNode' => 'AASTNode', + 'PHPASTNodeTestCase' => 'PhutilTestCase', 'PHPASTParserTestCase' => 'PhutilTestCase', + 'PHPASTSyntaxErrorException' => 'Exception', + 'PHPASTToken' => 'AASTToken', + 'PHPASTTree' => 'AASTTree', + 'PHPASTTreeTestCase' => 'PhutilTestCase', 'PhageAgentTestCase' => 'PhutilTestCase', 'PhagePHPAgentBootloader' => 'PhageAgentBootloader', 'Phobject' => 'Iterator', @@ -750,6 +763,7 @@ 'TestAbstractDirectedGraph' => 'AbstractDirectedGraph', 'XHPASTNode' => 'AASTNode', 'XHPASTNodeTestCase' => 'PhutilTestCase', + 'XHPASTParserTestCase' => 'PhutilTestCase', 'XHPASTSyntaxErrorException' => 'Exception', 'XHPASTToken' => 'AASTToken', 'XHPASTTree' => 'AASTTree', diff --git a/src/parser/php/PHPASTNode.php b/src/parser/php/PHPASTNode.php new file mode 100644 --- /dev/null +++ b/src/parser/php/PHPASTNode.php @@ -0,0 +1,236 @@ +getTypeName(), array( + 'n_STRING_SCALAR', + 'n_NUMERIC_SCALAR', + )); + } + + public function getDocblockToken() { + if ($this->l == -1) { + return null; + } + $tokens = $this->tree->getRawTokenStream(); + + for ($ii = $this->l - 1; $ii >= 0; $ii--) { + if ($tokens[$ii]->getTypeName() == 'T_DOC_COMMENT') { + return $tokens[$ii]; + } + if (!$tokens[$ii]->isAnyWhitespace()) { + return null; + } + } + + return null; + } + + public function evalStatic() { + switch ($this->getTypeName()) { + case 'n_STATEMENT': + return $this->getChildByIndex(0)->evalStatic(); + break; + case 'n_STRING_SCALAR': + return (string)$this->getStringLiteralValue(); + case 'n_NUMERIC_SCALAR': + $value = $this->getSemanticString(); + if (preg_match('/^0x/i', $value)) { + // Hex + $value = base_convert(substr($value, 2), 16, 10); + } else if (preg_match('/^0\d+$/i', $value)) { + // Octal + $value = base_convert(substr($value, 1), 8, 10); + } + return +$value; + case 'n_SYMBOL_NAME': + $value = $this->getSemanticString(); + if ($value == 'INF') { + return INF; + } + switch (strtolower($value)) { + case 'true': + return true; + case 'false': + return false; + case 'null': + return null; + default: + throw new Exception('Unrecognized symbol name.'); + } + break; + case 'n_UNARY_PREFIX_EXPRESSION': + $operator = $this->getChildOfType(0, 'n_OPERATOR'); + $operand = $this->getChildByIndex(1); + switch ($operator->getSemanticString()) { + case '-': + return -$operand->evalStatic(); + break; + case '+': + return $operand->evalStatic(); + break; + default: + throw new Exception('Unexpected operator in static expression.'); + } + break; + case 'n_ARRAY_LITERAL': + $result = array(); + $values = $this->getChildOfType(0, 'n_ARRAY_VALUE_LIST'); + foreach ($values->getChildren() as $child) { + $key = $child->getChildByIndex(0); + $val = $child->getChildByIndex(1); + if ($key->getTypeName() == 'n_EMPTY') { + $result[] = $val->evalStatic(); + } else { + $result[$key->evalStatic()] = $val->evalStatic(); + } + } + return $result; + case 'n_CONCATENATION_LIST': + $result = ''; + foreach ($this->getChildren() as $child) { + if ($child->getTypeName() == 'n_OPERATOR') { + continue; + } + $result .= $child->evalStatic(); + } + return $result; + default: + throw new Exception( + pht( + 'Unexpected node during static evaluation, of type: %s', + $this->getTypeName())); + } + } + + public function isConstantString() { + switch ($this->getTypeName()) { + case 'n_HEREDOC': + case 'n_STRING_SCALAR': + return !$this->getStringVariables(); + + case 'n_CONCATENATION_LIST': + foreach ($this->getChildren() as $child) { + if ($child->getTypeName() == 'n_OPERATOR') { + continue; + } + if (!$child->isConstantString()) { + return false; + } + } + return true; + + default: + return false; + } + } + + public function getStringVariables() { + $value = $this->getConcreteString(); + + switch ($this->getTypeName()) { + case 'n_HEREDOC': + if (preg_match("/^<<<\s*'/", $value)) { // Nowdoc: <<<'EOT' + return array(); + } + break; + + case 'n_STRING_SCALAR': + if ($value[0] == "'") { + return array(); + } + break; + + default: + throw new Exception('Unexpected type '.$this->getTypeName().'.'); + } + + // We extract just the variable names and ignore properties and array keys. + $re = '/\\\\.|(\$|\{\$|\${)([a-z_\x7F-\xFF][a-z0-9_\x7F-\xFF]*)/i'; + $matches = null; + preg_match_all($re, $value, $matches, PREG_OFFSET_CAPTURE); + return ipull(array_filter($matches[2]), 0, 1); + } + + public function getStringLiteralValue() { + if ($this->getTypeName() != 'n_STRING_SCALAR') { + return null; + } + + $value = $this->getSemanticString(); + $type = $value[0]; + $value = preg_replace('/^b?[\'"]|[\'"]$/i', '', $value); + $esc = false; + $len = strlen($value); + $out = ''; + + if ($type == "'") { + // Single quoted strings treat everything as a literal except "\\" and + // "\'". + return str_replace( + array('\\\\', '\\\''), + array('\\', "'"), + $value); + } + + // Double quoted strings treat "\X" as a literal if X isn't specifically + // a character which needs to be escaped -- e.g., "\q" and "\'" are + // literally "\q" and "\'". stripcslashes() is too aggressive, so find + // all these under-escaped backslashes and escape them. + + for ($ii = 0; $ii < $len; $ii++) { + $c = $value[$ii]; + if ($esc) { + $esc = false; + switch ($c) { + case 'x': + $u = isset($value[$ii + 1]) ? $value[$ii + 1] : null; + if (!preg_match('/^[a-f0-9]/i', $u)) { + // PHP treats \x followed by anything which is not a hex digit + // as a literal \x. + $out .= '\\\\'.$c; + break; + } + /* fallthrough */ + case 'n': + case 'r': + case 'f': + case 'v': + case '"': + case '$': + case 't': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + $out .= '\\'.$c; + break; + case 'e': + // Since PHP 5.4.0, this means "esc". However, stripcslashes() does + // not perform this conversion. + $out .= chr(27); + break; + default: + $out .= '\\\\'.$c; + break; + } + } else if ($c == '\\') { + $esc = true; + } else { + $out .= $c; + } + } + + return stripcslashes($out); + } + + public function getLineNumber() { + return idx($this->tree->getOffsetToLineNumberMap(), $this->getOffset()); + } + +} diff --git a/src/parser/php/PHPASTSyntaxErrorException.php b/src/parser/php/PHPASTSyntaxErrorException.php new file mode 100644 --- /dev/null +++ b/src/parser/php/PHPASTSyntaxErrorException.php @@ -0,0 +1,16 @@ +errorLine = $line; + parent::__construct($message); + } + + public function getErrorLine() { + return $this->errorLine; + } + +} diff --git a/src/parser/php/PHPASTToken.php b/src/parser/php/PHPASTToken.php new file mode 100644 --- /dev/null +++ b/src/parser/php/PHPASTToken.php @@ -0,0 +1,39 @@ +typeName)) { + $type_id = $this->typeID; + if ($type_id <= 255) { + $this->typeName = chr($type_id); + } + $this->typeName = parent::getTypeName(); + } + return $this->typeName; + } + + public function isComment() { + static $type_ids = null; + if ($type_ids === null) { + $type_ids = array( + $this->tree->getTokenTypeIDFromTypeName('T_COMMENT') => true, + $this->tree->getTokenTypeIDFromTypeName('T_DOC_COMMENT') => true, + ); + } + + return isset($type_ids[$this->typeID]); + } + + public function isAnyWhitespace() { + static $type_ids = null; + if ($type_ids === null) { + $type_ids = array( + $this->tree->getTokenTypeIDFromTypeName('T_WHITESPACE') => true, + ); + } + + return isset($type_ids[$this->typeID]); + } + +} diff --git a/src/parser/php/PHPASTTree.php b/src/parser/php/PHPASTTree.php new file mode 100644 --- /dev/null +++ b/src/parser/php/PHPASTTree.php @@ -0,0 +1,41 @@ +setTreeType('PHP'); + $this->setNodeConstants(array()); + $this->setTokenConstants(array()); + + parent::__construct($tree, $stream, $source); + } + + public function newNode($id, array $data, AASTTree $tree) { + return new PHPASTNode($id, $data, $tree); + } + + public function newToken( + $id, + $type, + $value, + $offset, + AASTTree $tree) { + return new PHPASTToken($id, $type, $value, $offset, $tree); + } + + public static function newFromData($php_source) { + $parser = new \PhpParser\Parser(new \PhpParser\Lexer()); + + $statements = $parser->parse($php_source); + + $tree = array(); + $stream = array(); + + foreach ($statements as $statement) { + // + } + + return new PHPASTTree($data['tree'], $data['stream'], $php_source); + } + +} diff --git a/src/parser/php/__tests__/PHPASTNodeTestCase.php b/src/parser/php/__tests__/PHPASTNodeTestCase.php new file mode 100644 --- /dev/null +++ b/src/parser/php/__tests__/PHPASTNodeTestCase.php @@ -0,0 +1,30 @@ +assertStringVariables(array(), '""'); + $this->assertStringVariables(array(2 => 'abc'), '"$abc"'); + $this->assertStringVariables(array(), '"\$abc"'); + $this->assertStringVariables(array(2 => 'a'), '"$a[1]"'); + $this->assertStringVariables(array(3 => 'a'), '"{$a[1]}"'); + $this->assertStringVariables(array(2 => 'a', 5 => 'a'), '"$a $a"'); + + $this->assertStringVariables(array(), "''"); + $this->assertStringVariables(array(), "'\$a'"); + + $this->assertStringVariables(array(), "<<assertStringVariables(array(8 => 'a'), "<<assertStringVariables(array(), "<<<'EOT'\n\$a\nEOT"); + } + + private function assertStringVariables($expected, $string) { + $statement = XHPASTTree::newStatementFromString($string); + $this->assertEqual( + $expected, + $statement->getChildByIndex(0)->getStringVariables(), + $string); + } + +} diff --git a/src/parser/xhpast/__tests__/PHPASTParserTestCase.php b/src/parser/php/__tests__/PHPASTParserTestCase.php rename from src/parser/xhpast/__tests__/PHPASTParserTestCase.php rename to src/parser/php/__tests__/PHPASTParserTestCase.php --- a/src/parser/xhpast/__tests__/PHPASTParserTestCase.php +++ b/src/parser/php/__tests__/PHPASTParserTestCase.php @@ -3,14 +3,6 @@ final class PHPASTParserTestCase extends PhutilTestCase { public function testParser() { - if (!PhutilXHPASTBinary::isAvailable()) { - try { - PhutilXHPASTBinary::build(); - } catch (Exception $ex) { - $this->assertSkipped(pht('xhpast is not built or not up to date.')); - } - } - $dir = dirname(__FILE__).'/data/'; foreach (Filesystem::listDirectory($dir) as $file) { if (preg_match('/\.test$/', $file)) { diff --git a/src/parser/php/__tests__/PHPASTTreeTestCase.php b/src/parser/php/__tests__/PHPASTTreeTestCase.php new file mode 100644 --- /dev/null +++ b/src/parser/php/__tests__/PHPASTTreeTestCase.php @@ -0,0 +1,139 @@ +assertEval(1, '1'); + $this->assertEval("a", '"a"'); + $this->assertEval(-1.1, '-1.1'); + $this->assertEval( + array('foo', 'bar', -1, +2, -3.4, +4.3, 1e10, 1e-5, -2.3e7), + "array('foo', 'bar', -1, +2, -3.4, +4.3, 1e10, 1e-5, -2.3e7)"); + $this->assertEval( + array(), + "array()"); + $this->assertEval( + array(42 => 7, 'a' => 5, 1, 2, 3, 4, 1 => 'goo'), + "array(42 => 7, 'a' => 5, 1, 2, 3, 4, 1 => 'goo')"); + $this->assertEval( + array('a' => 'a', 'b' => array(1, 2, array(3))), + "array('a' => 'a', 'b' => array(1, 2, array(3)))"); + $this->assertEval( + array(true, false, null), + "array(true, false, null)"); + + // Duplicate keys + $this->assertEval( + array(0 => '1', 0 => '2'), + "array(0 => '1', 0 => '2')"); + + $this->assertEval('simple string', "'simple string'"); + $this->assertEval('42', "'42'"); + $this->assertEval('binary string', "b'binary string'"); + $this->assertEval(3.1415926, "3.1415926"); + $this->assertEval(42, '42'); + $this->assertEval( + array(2147483648, 2147483647, -2147483648, -2147483647), + "array(2147483648, 2147483647, -2147483648, -2147483647)"); + + $this->assertEval(INF, 'INF'); + $this->assertEval(-INF, '-INF'); + + $this->assertEval(0x1b, '0x1b'); + $this->assertEval(0X0A, '0X0A'); + + // Octal + $this->assertEval(010, '010'); + $this->assertEval(080, '080'); // Invalid! + + // Leading 0, but float, not octal. + $this->assertEval(0.11e1, '0.11e1'); + $this->assertEval(0e1, '0e1'); + + $this->assertEval(0, '0'); + + // Static evaluation treats '$' as a literal dollar glyph. + $this->assertEval('$asdf', '"$asdf"'); + + $this->assertEval( + '\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z'. + '\1\2\3\4\5\6\7\8\9\0'. + '\!\@\#\$\%\^\&\*\(\)'. + '\`\~\\\|\[\]\{\}\<\>\,\.\/\?\:\;\-\_\=\+', + + "'\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q". + "\\r\\s\\t\\u\\v\\w\\x\\y\\z". + "\\1\\2\\3\\4\\5\\6\\7\\8\\9\\0". + "\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)". + "\\`\\~\\\\\\|\\[\\]\\{\\}\\<\\>\\,\\.\\/\\?\\:\\;\\-\\_\\=\\+". + "'"); + + // After PHP 5.4.0, "\e" means "escape", not "backslash e". We implement the + // newer rules, but if we're running in an older version of PHP we can not + // express them with "\e". + $this->assertEval(chr(27), '"\\e"'); + + $this->assertEval( + "\a\b\c\d\x1B\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z". + "\1\2\3\4\5\6\7\8\9\0". + "\!\@\#\$\%\^\&\*\(\)". + "\`\~\\\|\[\]\{\}\<\>\,\.\/\?\:\;\-\_\=\+", + + '"\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q'. + '\\r\\s\\t\\u\\v\\w\\x\\y\\z'. + '\\1\\2\\3\\4\\5\\6\\7\\8\\9\\0'. + '\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)'. + '\\`\\~\\\\\\|\\[\\]\\{\\}\\<\\>\\,\\.\\/\\?\\:\\;\\-\\_\\=\\+"'); + + $this->assertEval( + '\' "', + "'\\' \"'"); + + $this->assertEval( + '\\ \\\\ ', + '\'\\\\ \\\\\\\\ \''); + + $this->assertEval( + '\ \\ ', + "'\\ \\\\ '"); + + $this->assertEval( + '\x92', + '\'\x92\''); + + $this->assertEval( + "\x92", + '"\x92"'); + + $this->assertEval( + "\x", + '"\x"'); + + $this->assertEval( + "\x1", + '"\x1"'); + + $this->assertEval( + "\x000 !", + '"\x000 !"'); + + $this->assertEval( + "\x0", + '"\x0"'); + + $this->assertEval( + "\xg", + '"\xg"'); + } + + private function assertEval($value, $string) { + $this->assertEqual( + $value, + XHPASTTree::newStatementFromString($string)->evalStatic(), + $string); + } + +} diff --git a/src/parser/xhpast/__tests__/PHPASTParserTestCase.php b/src/parser/xhpast/__tests__/XHPASTParserTestCase.php rename from src/parser/xhpast/__tests__/PHPASTParserTestCase.php rename to src/parser/xhpast/__tests__/XHPASTParserTestCase.php --- a/src/parser/xhpast/__tests__/PHPASTParserTestCase.php +++ b/src/parser/xhpast/__tests__/XHPASTParserTestCase.php @@ -1,6 +1,6 @@