Changeset View
Standalone View
src/parser/xhpast/__tests__/PHPASTParserTestCase.php
Show First 20 Lines • Show All 74 Lines • ▼ Show 20 Lines | if ($type === null) { | ||||
$name)); | $name)); | ||||
} | } | ||||
$future = PhutilXHPASTBinary::getParserFuture($data); | $future = PhutilXHPASTBinary::getParserFuture($data); | ||||
list($err, $stdout, $stderr) = $future->resolve(); | list($err, $stdout, $stderr) = $future->resolve(); | ||||
switch ($type) { | switch ($type) { | ||||
case 'pass': | case 'pass': | ||||
case 'fail-parse': | |||||
$this->assertEqual(0, $err, pht('Exit code for "%s".', $name)); | $this->assertEqual(0, $err, pht('Exit code for "%s".', $name)); | ||||
if (!strlen($expect)) { | if (!strlen($expect)) { | ||||
// If there's no "expect" data in the test case, that's OK. | // If there's no "expect" data in the test case, that's OK. | ||||
break; | break; | ||||
} | } | ||||
try { | try { | ||||
$expect = phutil_json_decode($expect); | |||||
} catch (PhutilJSONParserException $ex) { | |||||
throw new PhutilProxyException( | |||||
pht( | |||||
'Expect data for test "%s" is not valid JSON.', | |||||
$name), | |||||
$ex); | |||||
} | |||||
try { | |||||
$stdout = phutil_json_decode($stdout); | $stdout = phutil_json_decode($stdout); | ||||
} catch (PhutilJSONParserException $ex) { | } catch (PhutilJSONParserException $ex) { | ||||
throw new PhutilProxyException( | throw new PhutilProxyException( | ||||
pht( | pht( | ||||
'Output for test file "%s" is not valid JSON.', | 'Output for test file "%s" is not valid JSON.', | ||||
$name), | $name), | ||||
$ex); | $ex); | ||||
} | } | ||||
$json = new PhutilJSON(); | $stdout_nice = $this->newReadableAST($stdout, $data); | ||||
$expect_nice = $json->encodeFormatted($expect); | |||||
$stdout_nice = $json->encodeFormatted($stdout); | |||||
joshuaspence: Debugging code? | |||||
if ($type == 'pass') { | |||||
$this->assertEqual( | $this->assertEqual( | ||||
Not Done Inline ActionsI'm wondering if we need this? It seems like there are three possible modes here:
The fail-syntax mode feels a bit odd to me, because it's too easy to get wrong. Like, you might just have a single extra space in your human-readable tree? joshuaspence: I'm wondering if we need this? It seems like there are three possible modes here:
- `pass`… | |||||
$expect_nice, | $expect, | ||||
$stdout_nice, | $stdout_nice, | ||||
pht('Parser output for "%s".', $name)); | pht('Parser output for "%s".', $name)); | ||||
} else { | |||||
$this->assertFalse( | |||||
($expect_nice == $stdout_nice), | |||||
pht('Expected parser to parse "%s" incorrectly.', $name)); | |||||
} | |||||
break; | break; | ||||
case 'fail-syntax': | case 'fail-syntax': | ||||
$this->assertEqual(1, $err, pht('Exit code for "%s".', $name)); | $this->assertEqual(1, $err, pht('Exit code for "%s".', $name)); | ||||
$this->assertTrue( | $this->assertTrue( | ||||
(bool)preg_match('/syntax error/', $stderr), | (bool)preg_match('/syntax error/', $stderr), | ||||
pht('Expect "syntax error" in stderr or "%s".', $name)); | pht('Expect "syntax error" in stderr or "%s".', $name)); | ||||
break; | break; | ||||
default: | |||||
throw new Exception( | |||||
pht( | |||||
'Unknown PHPAST parser test type "%s"!', | |||||
$type)); | |||||
joshuaspenceUnsubmitted Not Done Inline ActionsGood call. joshuaspence: Good call. | |||||
} | |||||
} | } | ||||
/** | |||||
* Build a human-readable, stable, relatively diff-able string representing | |||||
* an AST (both the node tree itself and the accompanying token stream) for | |||||
* use in unit tests. | |||||
*/ | |||||
private function newReadableAST(array $data, $source) { | |||||
$tree = new XHPASTTree($data['tree'], $data['stream'], $source); | |||||
$out = array(); | |||||
$out[] = $this->newReadableTree($tree); | |||||
$out[] = $this->newReadableDivider(); | |||||
$out[] = $this->newReadableStream($tree); | |||||
$out = implode('', $out); | |||||
Not Done Inline ActionsShould you use "\n" as a separate instead of ''? joshuaspence: Should you use `"\n"` as a separate instead of `''`? | |||||
return $out; | |||||
} | |||||
private function newReadableTree(XHPASTTree $tree) { | |||||
$root = $tree->getRootNode(); | |||||
$depth = 0; | |||||
$list = $this->newReadableTreeLines($root, $depth); | |||||
return implode('', $list); | |||||
} | |||||
private function newReadableDivider() { | |||||
return str_repeat('-', 80)."\n"; | |||||
Not Done Inline ActionsShould we make this ~~~~~~~~~~ to be more consistent with the *.php.test format? joshuaspence: Should we make this `~~~~~~~~~~` to be more consistent with the `*.php.test` format? | |||||
} | |||||
private function newReadableStream(XHPASTTree $tree) { | |||||
$tokens = $tree->getRawTokenStream(); | |||||
// Identify the longest token name this stream uses so we can format the | |||||
// output into two columns. | |||||
$max_len = 0; | |||||
Not Done Inline ActionsTechnically better might be PHP_INT_MIN, but that was only added in PHP 7.0. joshuaspence: Technically better might be `PHP_INT_MIN`, but that was only added in PHP 7.0. | |||||
foreach ($tokens as $token) { | |||||
$max_len = max($max_len, strlen($token->getTypeName())); | |||||
} | |||||
$out = array(); | |||||
$tokens = $tree->getRawTokenStream(); | |||||
foreach ($tokens as $token) { | |||||
$value = $token->getValue(); | |||||
$vector = $this->getEscapedValueVector($value); | |||||
$vector = array_merge( | |||||
array( | |||||
str_pad($token->getTypeName(), $max_len), | |||||
' ', | |||||
), | |||||
$vector); | |||||
$out[] = $this->wrapVector($vector); | |||||
} | |||||
$out = implode('', $out); | |||||
return $out; | |||||
} | |||||
private function newReadableTreeLines(AASTNode $node, $depth) { | |||||
$out = array(); | |||||
try { | |||||
$type_name = $node->getTypeName(); | |||||
} catch (Exception $ex) { | |||||
$type_name = sprintf('<INVALID TYPE "%s">', $node->getTypeID()); | |||||
} | |||||
$out[] = str_repeat(' ', $depth).'+ '.$type_name."\n"; | |||||
$children = $node->getChildren(); | |||||
if ($children) { | |||||
foreach ($children as $child) { | |||||
foreach ($this->newReadableTreeLines($child, $depth + 1) as $line) { | |||||
$out[] = $line; | |||||
} | |||||
} | |||||
} else { | |||||
$value = $node->getConcreteString(); | |||||
$vector = $this->getEscapedValueVector($value); | |||||
$out[] = $this->wrapVector($vector, $depth + 1); | |||||
} | |||||
return $out; | |||||
} | |||||
private function getEscapedValueVector($value) { | |||||
if (!$value) { | |||||
return array('<null>'); | |||||
} | |||||
$vector = phutil_utf8v_combined($value); | |||||
foreach ($vector as $key => $character) { | |||||
$vector[$key] = addcslashes($character, "\r\n\t\"\\"); | |||||
} | |||||
return $vector; | |||||
} | |||||
private function wrapVector(array $vector, $indent = 0, $width = 80) { | |||||
$lines = array(); | |||||
$prefix = str_repeat(' ', $indent); | |||||
Not Done Inline ActionsWhen would this happen? Would it be better to simply throw the exception here? joshuaspence: When would this happen? Would it be better to simply throw the exception here? | |||||
$line = $prefix.'> "'; | |||||
$len = strlen($line); | |||||
foreach ($vector as $character) { | |||||
// We're just wrapping based on bytes. This isn't the most correct | |||||
// wrapping for human language, but sufficient and stable for unit | |||||
// tests, which will rarely have long sequences of combining or | |||||
// multibyte characters. | |||||
$charlen = strlen($character); | |||||
if ($len + $charlen > ($width - 1)) { | |||||
$lines[] = $line."\"\n"; | |||||
$line = $prefix.'. "'; | |||||
$len = strlen($line); | |||||
} | |||||
$line .= $character; | |||||
$len += $charlen; | |||||
} | |||||
$lines[] = $line."\"\n"; | |||||
return implode('', $lines); | |||||
} | } | ||||
Not Done Inline ActionsMaybe instead of this: > T_TOKEN <null> We can just do this? > T_TOKEN joshuaspence: Maybe instead of this:
```
> T_TOKEN <null>
```
We can just do this?
```
> T_TOKEN
``` | |||||
} | } | ||||
Not Done Inline ActionsI wonder if all of this code should be moved to XHPASTTree? My concern is that this code is fairly complex and it would be nice to have some test coverage (i.e. some code that tests the tree -> readable string logic rather than testing the parser itself). joshuaspence: I wonder if all of this code should be moved to `XHPASTTree`? My concern is that this code is… | |||||
Not Done Inline ActionsMaybe move this to a newReadableStream() method? Then we can just do this: private function newReadableAST(array $data, $source) { $tree = new XHPASTTree($data['tree'], $data['stream'], $source); $root = $tree->getRootNode(); $output = implode('', $this->newReadableTree($root, 0)); $output .= str_repeat('-', 80)."\n"; $output .= $this->newReadableStream($tree); return $output; } joshuaspence: Maybe move this to a `newReadableStream()` method? Then we can just do this:
```lang=php… | |||||
Not Done Inline ActionsIt feels slightly odd to me that newReadableTree returns an array. joshuaspence: It feels slightly odd to me that `newReadableTree` returns an array. | |||||
Not Done Inline ActionsI understand escaping slashes and newlines, but why change whitespace to underscores? joshuaspence: I understand escaping slashes and newlines, but why change whitespace to underscores? | |||||
Not Done Inline ActionsI don't quite understand why we need this? Is this an optimization to avoid having really long lines in the test format? joshuaspence: I don't quite understand why we need this? Is this an optimization to avoid having really long… | |||||
Not Done Inline ActionsThis makes the output not technically stable because if someone adds a new token with a longer name then all of the tests will break? Is it important to pad the string here? I'm wondering if we could just do something like this instead: > | T_OPEN_TAG | <?php joshuaspence: This makes the output not technically stable because if someone adds a new token with a longer… |
Debugging code?