Changeset View
Changeset View
Standalone View
Standalone View
src/parser/calendar/ics/PhutilICSParser.php
<?php | <?php | ||||
final class PhutilICSParser extends Phobject { | final class PhutilICSParser extends Phobject { | ||||
private $stack; | private $stack; | ||||
private $node; | private $node; | ||||
private $document; | private $document; | ||||
private $lines; | |||||
private $cursor; | |||||
const PARSE_MISSING_END = 'missing-end'; | |||||
const PARSE_INITIAL_UNFOLD = 'initial-unfold'; | |||||
const PARSE_UNEXPECTED_CHILD = 'unexpected-child'; | |||||
const PARSE_EXTRA_END = 'extra-end'; | |||||
const PARSE_MISMATCHED_SECTIONS = 'mismatched-sections'; | |||||
const PARSE_ROOT_PROPERTY = 'root-property'; | |||||
const PARSE_BAD_BASE64 = 'bad-base64'; | |||||
const PARSE_BAD_BOOLEAN = 'bad-boolean'; | |||||
const PARSE_UNEXPECTED_TEXT = 'unexpected-text'; | |||||
const PARSE_MALFORMED_DOUBLE_QUOTE = 'malformed-double-quote'; | |||||
const PARSE_MALFORMED_PARAMETER_NAME = 'malformed-parameter'; | |||||
const PARSE_MALFORMED_PROPERTY = 'malformed-property'; | |||||
const PARSE_MISSING_VALUE = 'missing-value'; | |||||
const PARSE_UNESCAPED_BACKSLASH = 'unescaped-backslash'; | |||||
public function parseICSData($data) { | public function parseICSData($data) { | ||||
$this->stack = array(); | $this->stack = array(); | ||||
$this->node = null; | $this->node = null; | ||||
$this->document = null; | $this->cursor = null; | ||||
$lines = $this->unfoldICSLines($data); | $lines = $this->unfoldICSLines($data); | ||||
$this->lines = $lines; | |||||
$root = $this->newICSNode('<ROOT>'); | |||||
$this->stack[] = $root; | |||||
$this->node = $root; | |||||
foreach ($lines as $line) { | foreach ($lines as $key => $line) { | ||||
$this->cursor = $key; | |||||
$matches = null; | $matches = null; | ||||
if (preg_match('(^BEGIN:(.*)\z)', $line, $matches)) { | if (preg_match('(^BEGIN:(.*)\z)', $line, $matches)) { | ||||
$this->beginParsingNode($matches[1]); | $this->beginParsingNode($matches[1]); | ||||
} else if (preg_match('(^END:(.*)\z)', $line, $matches)) { | } else if (preg_match('(^END:(.*)\z)', $line, $matches)) { | ||||
$this->endParsingNode($matches[1]); | $this->endParsingNode($matches[1]); | ||||
} else { | } else { | ||||
$this->parseICSProperty($line); | if (count($this->stack) < 2) { | ||||
} | |||||
} | |||||
if (!$this->document) { | |||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_ROOT_PROPERTY, | |||||
pht( | pht( | ||||
'Expected ICS document to define a "VCALENDAR" section.')); | 'Found unexpected property at ICS document root.')); | ||||
} | |||||
$this->parseICSProperty($line); | |||||
} | |||||
} | } | ||||
if ($this->stack) { | if (count($this->stack) > 1) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_MISSING_END, | |||||
pht( | pht( | ||||
'Expected all "BEGIN:" sections in ICS document to have '. | 'Expected all "BEGIN:" sections in ICS document to have '. | ||||
'corresponding "END:" sections.')); | 'corresponding "END:" sections.')); | ||||
} | } | ||||
$document = $this->document; | $this->node = null; | ||||
$this->document = null; | $this->lines = null; | ||||
$this->cursor = null; | |||||
return $document; | return $root; | ||||
} | } | ||||
private function getNode() { | private function getNode() { | ||||
return $this->node; | return $this->node; | ||||
} | } | ||||
private function unfoldICSLines($data) { | private function unfoldICSLines($data) { | ||||
$lines = phutil_split_lines($data, $retain_endings = false); | $lines = phutil_split_lines($data, $retain_endings = false); | ||||
$this->lines = $lines; | |||||
// ICS files are wrapped at 75 characters, with overlong lines continued | // ICS files are wrapped at 75 characters, with overlong lines continued | ||||
// on the following line with an initial space or tab. Unwrap all of the | // on the following line with an initial space or tab. Unwrap all of the | ||||
// lines in the file. | // lines in the file. | ||||
$last = null; | $last = null; | ||||
foreach ($lines as $idx => $line) { | foreach ($lines as $idx => $line) { | ||||
$this->cursor = $idx; | |||||
if (!preg_match('/^[ \t]/', $line)) { | if (!preg_match('/^[ \t]/', $line)) { | ||||
$last = $idx; | $last = $idx; | ||||
continue; | continue; | ||||
} | } | ||||
if ($last === null) { | if ($last === null) { | ||||
throw new Exception( | $this->raiseParseFailure( | ||||
self::PARSE_INITIAL_UNFOLD, | |||||
pht( | pht( | ||||
'First line of ICS file begins with a space or tab, but this '. | 'First line of ICS file begins with a space or tab, but this '. | ||||
'marks a continuation line.')); | 'marks a line which should be unfolded.')); | ||||
} | } | ||||
$lines[$last] = $lines[$last].substr($line, 1); | $lines[$last] = $lines[$last].substr($line, 1); | ||||
unset($lines[$idx]); | unset($lines[$idx]); | ||||
} | } | ||||
return $lines; | return $lines; | ||||
} | } | ||||
private function beginParsingNode($type) { | private function beginParsingNode($type) { | ||||
$node = $this->getNode(); | $node = $this->getNode(); | ||||
$new_node = $this->newICSNode($type); | $new_node = $this->newICSNode($type); | ||||
if ($node) { | |||||
if ($node instanceof PhutilCalendarContainerNode) { | if ($node instanceof PhutilCalendarContainerNode) { | ||||
$node->appendChild($new_node); | $node->appendChild($new_node); | ||||
} else { | } else { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_UNEXPECTED_CHILD, | |||||
pht( | pht( | ||||
'Found unexpected node "%s" inside node "%s".', | 'Found unexpected node "%s" inside node "%s".', | ||||
$new_node->getAttribute('ics.type'), | $new_node->getAttribute('ics.type'), | ||||
$node->getAttribute('ics.type'))); | $node->getAttribute('ics.type'))); | ||||
} | } | ||||
} else { | |||||
if ($new_node instanceof PhutilCalendarDocumentNode) { | |||||
if ($this->document) { | |||||
$this->raiseParseFailure( | |||||
pht( | |||||
'Found multiple "VCALENDAR" nodes in ICS document, '. | |||||
'expected only one.')); | |||||
} else { | |||||
$this->document = $new_node; | |||||
} | |||||
} else { | |||||
$this->raiseParseFailure( | |||||
pht( | |||||
'Expected ICS document to begin "BEGIN:VCALENDAR".')); | |||||
} | |||||
} | |||||
$this->stack[] = $new_node; | $this->stack[] = $new_node; | ||||
$this->node = $new_node; | $this->node = $new_node; | ||||
return $this; | return $this; | ||||
} | } | ||||
private function newICSNode($type) { | private function newICSNode($type) { | ||||
switch ($type) { | switch ($type) { | ||||
case '<ROOT>': | |||||
$node = new PhutilCalendarRootNode(); | |||||
break; | |||||
case 'VCALENDAR': | case 'VCALENDAR': | ||||
$node = new PhutilCalendarDocumentNode(); | $node = new PhutilCalendarDocumentNode(); | ||||
break; | break; | ||||
case 'VEVENT': | case 'VEVENT': | ||||
$node = new PhutilCalendarEventNode(); | $node = new PhutilCalendarEventNode(); | ||||
break; | break; | ||||
default: | default: | ||||
$node = new PhutilCalendarRawNode(); | $node = new PhutilCalendarRawNode(); | ||||
break; | break; | ||||
} | } | ||||
$node->setAttribute('ics.type', $type); | $node->setAttribute('ics.type', $type); | ||||
return $node; | return $node; | ||||
} | } | ||||
private function endParsingNode($type) { | private function endParsingNode($type) { | ||||
$node = $this->getNode(); | $node = $this->getNode(); | ||||
if (!$node) { | if ($node instanceof PhutilCalendarRootNode) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_EXTRA_END, | |||||
pht( | pht( | ||||
'Found unexpected "END" without a "BEGIN".')); | 'Found unexpected "END" without a "BEGIN".')); | ||||
} | } | ||||
$old_type = $node->getAttribute('ics.type'); | $old_type = $node->getAttribute('ics.type'); | ||||
if ($old_type != $type) { | if ($old_type != $type) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_MISMATCHED_SECTIONS, | |||||
pht( | pht( | ||||
'Found mismatched "BEGIN" ("%s") and "END" ("%s") sections.', | 'Found mismatched "BEGIN" ("%s") and "END" ("%s") sections.', | ||||
$old_type, | $old_type, | ||||
$type)); | $type)); | ||||
} | } | ||||
array_pop($this->stack); | array_pop($this->stack); | ||||
if ($this->stack) { | |||||
$this->node = last($this->stack); | $this->node = last($this->stack); | ||||
} else { | |||||
$this->node = null; | |||||
} | |||||
return $this; | return $this; | ||||
} | } | ||||
private function parseICSProperty($line) { | private function parseICSProperty($line) { | ||||
$matches = null; | $matches = null; | ||||
// Properties begin with an alphanumeric name with no escaping, followed | // Properties begin with an alphanumeric name with no escaping, followed | ||||
// by either a ";" (to begin a list of parameters) or a ":" (to begin | // by either a ";" (to begin a list of parameters) or a ":" (to begin | ||||
// the actual field body). | // the actual field body). | ||||
$ok = preg_match('(^([^;:]+)([;:])(.*)\z)', $line, $matches); | $ok = preg_match('(^([A-Za-z0-9-]+)([;:])(.*)\z)', $line, $matches); | ||||
if (!$ok) { | if (!$ok) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_MALFORMED_PROPERTY, | |||||
pht( | pht( | ||||
'Found malformed line in ICS document: %s', | 'Found malformed property in ICS document.')); | ||||
$line)); | |||||
} | } | ||||
$name = $matches[1]; | $name = $matches[1]; | ||||
$body = $matches[3]; | $body = $matches[3]; | ||||
$has_parameters = ($matches[2] == ';'); | $has_parameters = ($matches[2] == ';'); | ||||
$parameters = array(); | $parameters = array(); | ||||
if ($has_parameters) { | if ($has_parameters) { | ||||
// Parameters are a sensible name, a literal "=", a pile of magic, | // Parameters are a sensible name, a literal "=", a pile of magic, | ||||
// and then maybe a comma and another parameter. | // and then maybe a comma and another parameter. | ||||
while (true) { | while (true) { | ||||
// We're going to get the first couple of parts first. | // We're going to get the first couple of parts first. | ||||
$ok = preg_match('(^([^=]+)=)', $body, $matches); | $ok = preg_match('(^([^=]+)=)', $body, $matches); | ||||
if (!$ok) { | if (!$ok) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_MALFORMED_PARAMETER_NAME, | |||||
pht( | pht( | ||||
'Found malformed property in ICS document: %s', | 'Found malformed property in ICS document: %s', | ||||
$body)); | $body)); | ||||
} | } | ||||
$param_name = $matches[1]; | $param_name = $matches[1]; | ||||
$body = substr($body, strlen($matches[0])); | $body = substr($body, strlen($matches[0])); | ||||
// Now we're going to match zero or more values. | // Now we're going to match zero or more values. | ||||
$param_values = array(); | $param_values = array(); | ||||
while (true) { | while (true) { | ||||
// The value can either be a double-quoted string or an unquoted | // The value can either be a double-quoted string or an unquoted | ||||
// string, with some characters forbidden. | // string, with some characters forbidden. | ||||
if (strlen($body) && $body[0] == '"') { | if (strlen($body) && $body[0] == '"') { | ||||
$is_quoted = true; | $is_quoted = true; | ||||
$ok = preg_match( | $ok = preg_match( | ||||
'(^"([^\x00-\x08\x10-\x19"]*)")', | '(^"([^\x00-\x08\x10-\x19"]*)")', | ||||
$body, | $body, | ||||
$matches); | $matches); | ||||
if (!$ok) { | if (!$ok) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_MALFORMED_DOUBLE_QUOTE, | |||||
pht( | pht( | ||||
'Found malformed double-quoted string in ICS document '. | 'Found malformed double-quoted string in ICS document '. | ||||
'parameter value: %s', | 'parameter value.')); | ||||
$body)); | |||||
} | } | ||||
} else { | } else { | ||||
$is_quoted = false; | $is_quoted = false; | ||||
$ok = preg_match( | |||||
'(^([^\x00-\x08\x10-\x19";:,]*))', | // It's impossible for this not to match since it can match | ||||
$body, | // nothing, and it's valid for it to match nothing. | ||||
$matches); | preg_match('(^([^\x00-\x08\x10-\x19";:,]*))', $body, $matches); | ||||
if (!$ok) { | |||||
$this->raiseParseFailure( | |||||
pht( | |||||
'Found malformed unquoted string in ICS document '. | |||||
'parameter value: %s', | |||||
$body)); | |||||
} | |||||
} | } | ||||
// NOTE: RFC5545 says "Property parameter values that are not in | // NOTE: RFC5545 says "Property parameter values that are not in | ||||
// quoted-strings are case-insensitive." -- that is, the quoted and | // quoted-strings are case-insensitive." -- that is, the quoted and | ||||
// unquoted representations are not equivalent. Thus, preserve the | // unquoted representations are not equivalent. Thus, preserve the | ||||
// original formatting in case we ever need to respect this. | // original formatting in case we ever need to respect this. | ||||
$param_values[] = array( | $param_values[] = array( | ||||
'value' => $this->unescapeParameterValue($matches[1]), | 'value' => $this->unescapeParameterValue($matches[1]), | ||||
'quoted' => $is_quoted, | 'quoted' => $is_quoted, | ||||
); | ); | ||||
$body = substr($body, strlen($matches[0])); | $body = substr($body, strlen($matches[0])); | ||||
if (!strlen($body)) { | if (!strlen($body)) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_MISSING_VALUE, | |||||
pht( | pht( | ||||
'Expected ":" after parameters in ICS document property.')); | 'Expected ":" after parameters in ICS document property.')); | ||||
} | } | ||||
// If we have a comma now, we're going to read another value. Strip | // If we have a comma now, we're going to read another value. Strip | ||||
// it off and keep going. | // it off and keep going. | ||||
if ($body[0] == ',') { | if ($body[0] == ',') { | ||||
$body = substr($body, 1); | $body = substr($body, 1); | ||||
continue; | continue; | ||||
} | } | ||||
// If we have a semicolon, we're going to read another parameter. | |||||
if ($body[0] == ';') { | |||||
break; | |||||
} | |||||
// If we have a colon, this is the last value and also the last | // If we have a colon, this is the last value and also the last | ||||
// property. Break, then handle the colon below. | // property. Break, then handle the colon below. | ||||
if ($body[0] == ':') { | if ($body[0] == ':') { | ||||
break; | break; | ||||
} | } | ||||
$short_body = id(new PhutilUTF8StringTruncator()) | |||||
->setMaximumGlyphs(32) | |||||
->truncateString($body); | |||||
// We aren't expecting anything else. | // We aren't expecting anything else. | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_UNEXPECTED_TEXT, | |||||
pht( | pht( | ||||
'Found unexpected text after reading parameter value: %s', | 'Found unexpected text ("%s") after reading parameter value.', | ||||
$body)); | $short_body)); | ||||
} | } | ||||
$parameters[] = array( | $parameters[] = array( | ||||
'name' => $param_name, | 'name' => $param_name, | ||||
'values' => $param_values, | 'values' => $param_values, | ||||
); | ); | ||||
if ($body[0] == ';') { | |||||
$body = substr($body, 1); | |||||
continue; | |||||
} | |||||
if ($body[0] == ':') { | if ($body[0] == ':') { | ||||
$body = substr($body, 1); | $body = substr($body, 1); | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
$value = $this->unescapeFieldValue($name, $parameters, $body); | $value = $this->unescapeFieldValue($name, $parameters, $body); | ||||
$node = $this->getNode(); | $node = $this->getNode(); | ||||
$raw = $node->getAttribute('ics.properties', array()); | $raw = $node->getAttribute('ics.properties', array()); | ||||
$raw[] = array( | $raw[] = array( | ||||
'name' => $name, | 'name' => $name, | ||||
'parameters' => $parameters, | 'parameters' => $parameters, | ||||
'value' => $value, | 'value' => $value, | ||||
); | ); | ||||
$node->setAttribute('ics.properties', $raw); | $node->setAttribute('ics.properties', $raw); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 115 Lines • ▼ Show 20 Lines | private function unescapeFieldValue($name, array $parameters, $data) { | ||||
foreach ($parameters as $parameter) { | foreach ($parameters as $parameter) { | ||||
if ($parameter['name'] == 'VALUE') { | if ($parameter['name'] == 'VALUE') { | ||||
$value_type = idx(head($parameter['values']), 'value'); | $value_type = idx(head($parameter['values']), 'value'); | ||||
} | } | ||||
} | } | ||||
switch ($value_type) { | switch ($value_type) { | ||||
case 'BINARY': | case 'BINARY': | ||||
$result = base64_decode($data); | $result = base64_decode($data, true); | ||||
if ($result === false) { | if ($result === false) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_BAD_BASE64, | |||||
pht( | pht( | ||||
'Unable to decode base64 data: %s', | 'Unable to decode base64 data: %s', | ||||
$data)); | $data)); | ||||
} | } | ||||
break; | break; | ||||
case 'BOOLEAN': | case 'BOOLEAN': | ||||
$map = array( | $map = array( | ||||
'true' => true, | 'true' => true, | ||||
'false' => false, | 'false' => false, | ||||
); | ); | ||||
$result = phutil_utf8_strtolower($data); | $result = phutil_utf8_strtolower($data); | ||||
if (!isset($map[$result])) { | if (!isset($map[$result])) { | ||||
$this->raiseParseFailure( | $this->raiseParseFailure( | ||||
self::PARSE_BAD_BOOLEAN, | |||||
pht( | pht( | ||||
'Unexpected BOOLEAN value "%s".', | 'Unexpected BOOLEAN value "%s".', | ||||
$data)); | $data)); | ||||
} | } | ||||
$result = $map[$result]; | $result = $map[$result]; | ||||
break; | break; | ||||
case 'CAL-ADDRESS': | case 'CAL-ADDRESS': | ||||
$result = $data; | $result = $data; | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | foreach (phutil_utf8v($data) as $c) { | ||||
default: | default: | ||||
$buf .= $c; | $buf .= $c; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if ($esc) { | if ($esc) { | ||||
$this->raiseParsFailure( | $this->raiseParseFailure( | ||||
self::PARSE_UNESCAPED_BACKSLASH, | |||||
pht( | pht( | ||||
'ICS document contains TEXT value ending with unescaped '. | 'ICS document contains TEXT value ending with unescaped '. | ||||
'backslash.')); | 'backslash.')); | ||||
} | } | ||||
$result[] = $buf; | $result[] = $buf; | ||||
return $result; | return $result; | ||||
} | } | ||||
private function raiseParseFailure($message) { | private function raiseParseFailure($code, $message) { | ||||
throw new Exception($message); | if ($this->lines && isset($this->lines[$this->cursor])) { | ||||
$message = pht( | |||||
"ICS Parse Error near line %s:\n\n>>> %s\n\n%s", | |||||
$this->cursor + 1, | |||||
$this->lines[$this->cursor], | |||||
$message); | |||||
} else { | |||||
$message = pht( | |||||
'ICS Parse Error: %s', | |||||
$message); | |||||
} | |||||
throw id(new PhutilICSParserException($message)) | |||||
->setParserFailureCode($code); | |||||
} | } | ||||
} | } |