Page MenuHomePhabricator

D9370.id22333.diff
No OneTemporary

D9370.id22333.diff

diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -237,6 +237,7 @@
'PhutilProtocolChannel' => 'channel/PhutilProtocolChannel.php',
'PhutilProxyException' => 'error/PhutilProxyException.php',
'PhutilPygmentsSyntaxHighlighter' => 'markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php',
+ 'PhutilPythonFragmentLexer' => 'lexer/PhutilPythonFragmentLexer.php',
'PhutilQsprintfInterface' => 'xsprintf/PhutilQsprintfInterface.php',
'PhutilQueryStringParser' => 'parser/PhutilQueryStringParser.php',
'PhutilQueryStringParserTestCase' => 'parser/__tests__/PhutilQueryStringParserTestCase.php',
@@ -424,6 +425,7 @@
'vqueryfx_all' => 'xsprintf/queryfx.php',
'vurisprintf' => 'xsprintf/urisprintf.php',
'xhp_parser_node_constants' => 'parser/xhpast/parser_nodes.php',
+ 'xhpast_build' => 'parser/xhpast/bin/xhpast_parse.php',
'xhpast_get_binary_path' => 'parser/xhpast/bin/xhpast_parse.php',
'xhpast_get_build_instructions' => 'parser/xhpast/bin/xhpast_parse.php',
'xhpast_get_parser_future' => 'parser/xhpast/bin/xhpast_parse.php',
@@ -618,6 +620,7 @@
'PhutilProcessGroupDaemon' => 'PhutilTortureTestDaemon',
'PhutilProtocolChannel' => 'PhutilChannelChannel',
'PhutilProxyException' => 'Exception',
+ 'PhutilPythonFragmentLexer' => 'PhutilLexer',
'PhutilQueryStringParserTestCase' => 'PhutilTestCase',
'PhutilReadableSerializerTestCase' => 'PhutilTestCase',
'PhutilRealnameContextFreeGrammar' => 'PhutilContextFreeGrammar',
diff --git a/src/lexer/PhutilPythonFragmentLexer.php b/src/lexer/PhutilPythonFragmentLexer.php
new file mode 100644
--- /dev/null
+++ b/src/lexer/PhutilPythonFragmentLexer.php
@@ -0,0 +1,319 @@
+<?php
+
+/**
+ * Python lexer which can handle fragments of source code, e.g. for syntax
+ * highlighting of inline snippets. This is largely based on Pygments:
+ *
+ * https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/lexers/agile.py
+ *
+ * This lexer is not suitable for parser construction; it always lexes any
+ * input stream, even if the input is not Python.
+ *
+ * @group lexer
+ */
+final class PhutilPythonFragmentLexer extends PhutilLexer {
+
+ protected function getRawRules() {
+ $keywords = array(
+ 'as',
+ 'assert',
+ 'break',
+ 'continue',
+ 'del',
+ 'elif',
+ 'else',
+ 'except',
+ 'exec',
+ 'finally',
+ 'for',
+ 'global',
+ 'if',
+ 'lambda',
+ 'pass',
+ 'print',
+ 'raise',
+ 'return',
+ 'try',
+ 'while',
+ 'with',
+ 'yield(\s+from)?',
+ );
+
+ $builtins = array(
+ '__import__',
+ 'abs',
+ 'all',
+ 'any',
+ 'apply',
+ 'basestring',
+ 'bin',
+ 'bool',
+ 'buffer',
+ 'bytearray',
+ 'bytes',
+ 'callable',
+ 'chr',
+ 'classmethod',
+ 'cmp',
+ 'coerce',
+ 'compile',
+ 'complex',
+ 'delattr',
+ 'dict',
+ 'dir',
+ 'divmod',
+ 'enumerate',
+ 'eval',
+ 'execfile',
+ 'exit',
+ 'file',
+ 'filter',
+ 'float',
+ 'frozenset',
+ 'getattr',
+ 'globals',
+ 'hasattr',
+ 'hash',
+ 'hex',
+ 'id',
+ 'input',
+ 'int',
+ 'intern',
+ 'isinstance',
+ 'issubclass',
+ 'iter',
+ 'len',
+ 'list',
+ 'locals',
+ 'long',
+ 'map',
+ 'max',
+ 'min',
+ 'next',
+ 'object',
+ 'oct',
+ 'open',
+ 'ord',
+ 'pow',
+ 'property',
+ 'range',
+ 'raw_input',
+ 'reduce',
+ 'reload',
+ 'repr',
+ 'reversed',
+ 'round',
+ 'set',
+ 'setattr',
+ 'slice',
+ 'sorted',
+ 'staticmethod',
+ 'str',
+ 'sum',
+ 'super',
+ 'tuple',
+ 'type',
+ 'unichr',
+ 'unicode',
+ 'vars',
+ 'xrange',
+ 'zip',
+ );
+
+ $pseudo_builtins = array(
+ 'Ellipsis',
+ 'False',
+ 'None',
+ 'NotImplemented',
+ 'True',
+ 'self',
+ );
+
+ $exceptions = array(
+ 'ArithmeticError',
+ 'AssertionError',
+ 'AttributeError',
+ 'BaseException',
+ 'DeprecationWarning',
+ 'EOFError',
+ 'EnvironmentError',
+ 'Exception',
+ 'FloatingPointError',
+ 'FutureWarning',
+ 'GeneratorExit',
+ 'IOError',
+ 'ImportError',
+ 'ImportWarning',
+ 'IndentationError',
+ 'IndexError',
+ 'KeyError',
+ 'KeyboardInterrupt',
+ 'LookupError',
+ 'MemoryError',
+ 'NameError',
+ 'NotImplemented',
+ 'NotImplementedError',
+ 'OSError',
+ 'OverflowError',
+ 'OverflowWarning',
+ 'PendingDeprecationWarning',
+ 'ReferenceError',
+ 'RuntimeError',
+ 'RuntimeWarning',
+ 'StandardError',
+ 'StopIteration',
+ 'SyntaxError',
+ 'SyntaxWarning',
+ 'SystemError',
+ 'SystemExit',
+ 'TabError',
+ 'TypeError',
+ 'UnboundLocalError',
+ 'UnicodeDecodeError',
+ 'UnicodeEncodeError',
+ 'UnicodeError',
+ 'UnicodeTranslateError',
+ 'UnicodeWarning',
+ 'UserWarning',
+ 'VMSError',
+ 'ValueError',
+ 'Warning',
+ 'WindowsError',
+ 'ZeroDivisionError',
+ );
+
+ $nonsemantic_rules = array(
+ array('[^\\S\\n]+', null),
+ array('#[^\\n]*', 'c'),
+ );
+
+ $stringescape = array(
+ array(
+ '\\\\([\\\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'.
+ 'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})',
+ 'se'
+ ),
+ );
+
+ $strings = array(
+ array(
+ '%(\\(\\w+\\))?[-#0 +]*([0-9]+|[*])?(\\.([0-9]+|[*]))?'.
+ '[hlL]?[diouxXeEfFgGcrs%]',
+ 'si'
+ ),
+ array('[^\\\\\'"%\\n]+', 's'),
+ // quotes, percents, and backslashes must be parsed one at a time
+ array('[\'"\\\\]', 's'),
+ // unhandled string formatting sign
+ array('%', 's'),
+ // newlines are an error (use $nl rules)
+ );
+
+ $nl = array(
+ array('\\n', 's'),
+ );
+
+ $dqs = array_merge(array(
+ array('"', 's', '!pop'),
+ // included here for raw strings
+ array('(?:\\\\\\\\|\\\\\'|\\\\n)', 's', '!pop'),
+ ), $strings, $nl);
+
+ $sqs = array_merge(array(
+ array('\'', 's', '!pop'),
+ // included here for raw strings
+ array('(?:\\\\\\\\|\\\\\'|\\\\n)', 's', '!pop'),
+ ), $strings, $nl);
+
+ $sqs = array_merge(array(
+ array('\'', 's', '!pop'),
+ ), $strings, $nl);
+
+ $tdqs = array_merge(array(
+ array('"""', 's', '!pop'),
+ ), $strings, $nl);
+
+ $tsqs = array_merge(array(
+ array('\'\'\'', 's', '!pop'),
+ ), $strings, $nl);
+
+ return array(
+ 'start' => array_merge(array(
+ array('\\n', null),
+ // TODO: Docstrings should match only at the start of a line
+ array('""".*?"""', 'sd'),
+ array('\'\'\'.*?\'\'\'', 'sd'),
+ ), $nonsemantic_rules, array(
+ array('[]{}:(),;[]', 'p'),
+ array('\\\\\\n', null),
+ array('\\\\', null),
+ array('(?:in|is|and|or|not)\\b', 'ow'),
+ array('(?:!=|==|<<|>>|[-~+/*%=<>&^|.])', 'o'),
+ array('(?:'.implode('|', $keywords).')\\b', 'k'),
+ array('def(?=\\s)', 'k', 'funcname'),
+ array('class(?=\\s)', 'k', 'classname'),
+ array('from(?=\\s)', 'kn', 'fromimport'),
+ array('import(?=\\s)', 'kn', 'import'),
+ array('(?<!\\.)(?:'.implode('|', $builtins).')\\b', 'nb'),
+ array('(?<!\\.)(?:'.implode('|', $pseudo_builtins).')\\b', 'bp'),
+ array('(?<!\\.)(?:'.implode('|', $exceptions).')\\b', 'ne'),
+ array('`[^\\n]*?`', 'sb'),
+ array('(?:[rR]|[uU][rR]|[rR][uU])"""', 's', 'tdqs_raw'),
+ array('(?:[rR]|[uU][rR]|[rR][uU])\'\'\'', 's', 'tsqs_raw'),
+ array('(?:[rR]|[uU][rR]|[rR][uU])"', 's', 'dqs_raw'),
+ array('(?:[rR]|[uU][rR]|[rR][uU])\'', 's', 'sqs_raw'),
+ array('[uU]?"""', 's', 'tdqs'),
+ array('[uU]?\'\'\'', 's', 'tsqs'),
+ array('[uU]?"', 's', 'dqs'),
+ array('[uU]?\'', 's', 'sqs'),
+ array('@[\\w.]+', 'nd'),
+ array('[a-zA-Z_]\\w*', 'n'),
+ array('(\\d+\\.\\d*|\\d*\\.\\d+)([eE][+-]?[0-9]+)?j?', 'mf'),
+ array('\\d+[eE][+-]?[0-9]+j?', 'mf'),
+ array('0[0-7]+j?', 'mo'),
+ array('0[bB][01]+', 'mb'),
+ array('0[xX][a-fA-F0-9]+', 'mh'),
+ array('\\d+L', 'il'),
+ array('\\d+j?', 'mi'),
+ array('.', null),
+ )),
+
+ 'funcname' => array_merge($nonsemantic_rules, array(
+ array('[a-zA-Z_]\w*', 'nf', '!pop'),
+ array('', null, '!pop'),
+ )),
+
+ 'classname' => array_merge($nonsemantic_rules, array(
+ array('[a-zA-Z_]\w*', 'nc', '!pop'),
+ array('', null, '!pop'),
+ )),
+
+ 'fromimport' => array_merge($nonsemantic_rules, array(
+ array('import\b', 'kn', '!pop'),
+ // if None occurs here, it's "raise x from None", since None can
+ // never be a module name
+ array('None\b', 'bp', '!pop'),
+ // sadly, in "raise x from y" y will be highlighted as namespace too
+ array('[a-zA-Z_.][w.]*', 'nn'),
+ array('', null, '!pop'),
+ )),
+
+ 'import' => array_merge($nonsemantic_rules, array(
+ array('as\b', 'kn'),
+ array(',', 'o'),
+ array('[a-zA-Z_.][w.]*', 'nn'),
+ array('', null, '!pop'),
+ )),
+
+ 'dqs_raw' => $dqs,
+ 'sqs_raw' => $sqs,
+ 'dqs' => array_merge($stringescape, $dqs),
+ 'sqs' => array_merge($stringescape, $sqs),
+ 'tdqs_raw' => $tdqs,
+ 'tsqs_raw' => $tsqs,
+ 'tdqs' => array_merge($stringescape, $tdqs),
+ 'tsqs' => array_merge($stringescape, $tsqs),
+
+ );
+ }
+}
diff --git a/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php
--- a/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php
+++ b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php
@@ -78,6 +78,13 @@
->getHighlightFuture($source);
}
+ if ($language == 'py') {
+ return id(new PhutilLexerSyntaxHighlighter())
+ ->setConfig('lexer', new PhutilPythonFragmentLexer())
+ ->setConfig('language', 'py')
+ ->getHighlightFuture($source);
+ }
+
if ($language == 'invisible') {
return id(new PhutilInvisibleSyntaxHighlighter())
->getHighlightFuture($source);

File Metadata

Mime Type
text/plain
Expires
Sun, Oct 20, 5:34 PM (1 d, 7 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6737062
Default Alt Text
D9370.id22333.diff (10 KB)

Event Timeline