diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -237,6 +237,7 @@ 'PhutilProtocolChannel' => 'channel/PhutilProtocolChannel.php', 'PhutilProxyException' => 'error/PhutilProxyException.php', 'PhutilPygmentsSyntaxHighlighter' => 'markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php', + 'PhutilPythonFragmentLexer' => 'lexer/PhutilPythonFragmentLexer.php', 'PhutilQsprintfInterface' => 'xsprintf/PhutilQsprintfInterface.php', 'PhutilQueryStringParser' => 'parser/PhutilQueryStringParser.php', 'PhutilQueryStringParserTestCase' => 'parser/__tests__/PhutilQueryStringParserTestCase.php', @@ -619,6 +620,7 @@ 'PhutilProcessGroupDaemon' => 'PhutilTortureTestDaemon', 'PhutilProtocolChannel' => 'PhutilChannelChannel', 'PhutilProxyException' => 'Exception', + 'PhutilPythonFragmentLexer' => 'PhutilLexer', 'PhutilQueryStringParserTestCase' => 'PhutilTestCase', 'PhutilReadableSerializerTestCase' => 'PhutilTestCase', 'PhutilRealnameContextFreeGrammar' => 'PhutilContextFreeGrammar', diff --git a/src/lexer/PhutilPythonFragmentLexer.php b/src/lexer/PhutilPythonFragmentLexer.php new file mode 100644 --- /dev/null +++ b/src/lexer/PhutilPythonFragmentLexer.php @@ -0,0 +1,315 @@ +<?php + +/** + * Python lexer which can handle fragments of source code, e.g. for syntax + * highlighting of inline snippets. This is largely based on Pygments: + * + * https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/lexers/agile.py + * + * This lexer is not suitable for parser construction; it always lexes any + * input stream, even if the input is not Python. + * + * @group lexer + */ +final class PhutilPythonFragmentLexer extends PhutilLexer { + + protected function getRawRules() { + $keywords = array( + 'as', + 'assert', + 'break', + 'continue', + 'del', + 'elif', + 'else', + 'except', + 'exec', + 'finally', + 'for', + 'global', + 'if', + 'lambda', + 'pass', + 'print', + 'raise', + 'return', + 'try', + 'while', + 'with', + 'yield(\s+from)?', + ); + + $builtins = array( + '__import__', + 'abs', + 'all', + 'any', + 'apply', + 'basestring', + 'bin', + 'bool', + 'buffer', + 'bytearray', + 'bytes', + 'callable', + 'chr', + 'classmethod', + 'cmp', + 'coerce', + 'compile', + 'complex', + 'delattr', + 'dict', + 'dir', + 'divmod', + 'enumerate', + 'eval', + 'execfile', + 'exit', + 'file', + 'filter', + 'float', + 'frozenset', + 'getattr', + 'globals', + 'hasattr', + 'hash', + 'hex', + 'id', + 'input', + 'int', + 'intern', + 'isinstance', + 'issubclass', + 'iter', + 'len', + 'list', + 'locals', + 'long', + 'map', + 'max', + 'min', + 'next', + 'object', + 'oct', + 'open', + 'ord', + 'pow', + 'property', + 'range', + 'raw_input', + 'reduce', + 'reload', + 'repr', + 'reversed', + 'round', + 'set', + 'setattr', + 'slice', + 'sorted', + 'staticmethod', + 'str', + 'sum', + 'super', + 'tuple', + 'type', + 'unichr', + 'unicode', + 'vars', + 'xrange', + 'zip', + ); + + $pseudo_builtins = array( + 'Ellipsis', + 'False', + 'None', + 'NotImplemented', + 'True', + 'self', + ); + + $exceptions = array( + 'ArithmeticError', + 'AssertionError', + 'AttributeError', + 'BaseException', + 'DeprecationWarning', + 'EOFError', + 'EnvironmentError', + 'Exception', + 'FloatingPointError', + 'FutureWarning', + 'GeneratorExit', + 'IOError', + 'ImportError', + 'ImportWarning', + 'IndentationError', + 'IndexError', + 'KeyError', + 'KeyboardInterrupt', + 'LookupError', + 'MemoryError', + 'NameError', + 'NotImplemented', + 'NotImplementedError', + 'OSError', + 'OverflowError', + 'OverflowWarning', + 'PendingDeprecationWarning', + 'ReferenceError', + 'RuntimeError', + 'RuntimeWarning', + 'StandardError', + 'StopIteration', + 'SyntaxError', + 'SyntaxWarning', + 'SystemError', + 'SystemExit', + 'TabError', + 'TypeError', + 'UnboundLocalError', + 'UnicodeDecodeError', + 'UnicodeEncodeError', + 'UnicodeError', + 'UnicodeTranslateError', + 'UnicodeWarning', + 'UserWarning', + 'VMSError', + 'ValueError', + 'Warning', + 'WindowsError', + 'ZeroDivisionError', + ); + + $nonsemantic_rules = array( + array('[^\\S\\n]+', null), + array('#[^\\n]*', 'c'), + ); + + $stringescape = array( + array( + '\\\\([\\\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'. + 'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', + 'se' + ), + ); + + $strings = array( + array( + '%(\\(\\w+\\))?[-#0 +]*([0-9]+|[*])?(\\.([0-9]+|[*]))?'. + '[hlL]?[diouxXeEfFgGcrs%]', + 'si' + ), + array('[^\\\\\'"%\\n]+', 's'), + // quotes, percents, and backslashes must be parsed one at a time + array('[\'"\\\\]', 's'), + // unhandled string formatting sign + array('%', 's'), + // newlines are an error (use $nl rules) + ); + + $nl = array( + array('\\n', 's'), + ); + + $dqs = array_merge(array( + array('"', 's', '!pop'), + // included here for raw strings + array('(?:\\\\\\\\|\\\\\'|\\\\n)', 's', '!pop'), + ), $strings); + + $sqs = array_merge(array( + array('\'', 's', '!pop'), + // included here for raw strings + array('(?:\\\\\\\\|\\\\\'|\\\\n)', 's', '!pop'), + ), $strings); + + $tdqs = array_merge(array( + array('"""', 's', '!pop'), + ), $strings, $nl); + + $tsqs = array_merge(array( + array('\'\'\'', 's', '!pop'), + ), $strings, $nl); + + return array( + 'start' => array_merge(array( + array('\\n', null), + // TODO: Docstrings should match only at the start of a line + array('""".*?"""', 'sd'), + array('\'\'\'.*?\'\'\'', 'sd'), + ), $nonsemantic_rules, array( + array('[]{}:(),;[]', 'p'), + array('\\\\\\n', null), + array('\\\\', null), + array('(?:in|is|and|or|not)\\b', 'ow'), + array('(?:!=|==|<<|>>|[-~+/*%=<>&^|.])', 'o'), + array('(?:'.implode('|', $keywords).')\\b', 'k'), + array('def(?=\\s)', 'k', 'funcname'), + array('class(?=\\s)', 'k', 'classname'), + array('from(?=\\s)', 'kn', 'fromimport'), + array('import(?=\\s)', 'kn', 'import'), + array('(?<!\\.)(?:'.implode('|', $builtins).')\\b', 'nb'), + array('(?<!\\.)(?:'.implode('|', $pseudo_builtins).')\\b', 'bp'), + array('(?<!\\.)(?:'.implode('|', $exceptions).')\\b', 'ne'), + array('`[^\\n]*?`', 'sb'), + array('(?:[rR]|[uU][rR]|[rR][uU])"""', 's', 'tdqs_raw'), + array('(?:[rR]|[uU][rR]|[rR][uU])\'\'\'', 's', 'tsqs_raw'), + array('(?:[rR]|[uU][rR]|[rR][uU])"', 's', 'dqs_raw'), + array('(?:[rR]|[uU][rR]|[rR][uU])\'', 's', 'sqs_raw'), + array('[uU]?"""', 's', 'tdqs'), + array('[uU]?\'\'\'', 's', 'tsqs'), + array('[uU]?"', 's', 'dqs'), + array('[uU]?\'', 's', 'sqs'), + array('@[\\w.]+', 'nd'), + array('[a-zA-Z_]\\w*', 'n'), + array('(\\d+\\.\\d*|\\d*\\.\\d+)([eE][+-]?[0-9]+)?j?', 'mf'), + array('\\d+[eE][+-]?[0-9]+j?', 'mf'), + array('0[0-7]+j?', 'mo'), + array('0[bB][01]+', 'mb'), + array('0[xX][a-fA-F0-9]+', 'mh'), + array('\\d+L', 'il'), + array('\\d+j?', 'mi'), + array('.', null), + )), + + 'funcname' => array_merge($nonsemantic_rules, array( + array('[a-zA-Z_]\w*', 'nf', '!pop'), + array('', null, '!pop'), + )), + + 'classname' => array_merge($nonsemantic_rules, array( + array('[a-zA-Z_]\w*', 'nc', '!pop'), + array('', null, '!pop'), + )), + + 'fromimport' => array_merge($nonsemantic_rules, array( + array('import\b', 'kn', '!pop'), + // if None occurs here, it's "raise x from None", since None can + // never be a module name + array('None\b', 'bp', '!pop'), + // sadly, in "raise x from y" y will be highlighted as namespace too + array('[a-zA-Z_.][w.]*', 'nn'), + array('', null, '!pop'), + )), + + 'import' => array_merge($nonsemantic_rules, array( + array('as\b', 'kn'), + array(',', 'o'), + array('[a-zA-Z_.][w.]*', 'nn'), + array('', null, '!pop'), + )), + + 'dqs_raw' => $dqs, + 'sqs_raw' => $sqs, + 'dqs' => array_merge($stringescape, $dqs), + 'sqs' => array_merge($stringescape, $sqs), + 'tdqs_raw' => $tdqs, + 'tsqs_raw' => $tsqs, + 'tdqs' => array_merge($stringescape, $tdqs), + 'tsqs' => array_merge($stringescape, $tsqs), + + ); + } +} diff --git a/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php --- a/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php +++ b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php @@ -78,6 +78,13 @@ ->getHighlightFuture($source); } + if ($language == 'py') { + return id(new PhutilLexerSyntaxHighlighter()) + ->setConfig('lexer', new PhutilPythonFragmentLexer()) + ->setConfig('language', 'py') + ->getHighlightFuture($source); + } + if ($language == 'invisible') { return id(new PhutilInvisibleSyntaxHighlighter()) ->getHighlightFuture($source);