Changeset View
Changeset View
Standalone View
Standalone View
src/aphront/multipartparser/AphrontMultipartParser.php
- This file was added.
<?php | |||||
final class AphrontMultipartParser extends Phobject { | |||||
private $contentType; | |||||
private $boundary; | |||||
private $buffer; | |||||
private $body; | |||||
private $state; | |||||
private $part; | |||||
private $parts; | |||||
public function setContentType($content_type) { | |||||
$this->contentType = $content_type; | |||||
return $this; | |||||
} | |||||
public function getContentType() { | |||||
return $this->contentType; | |||||
} | |||||
public function beginParse() { | |||||
$content_type = $this->getContentType(); | |||||
if ($content_type === null) { | |||||
throw new PhutilInvalidStateException('setContentType'); | |||||
} | |||||
if (!preg_match('(^multipart/form-data)', $content_type)) { | |||||
throw new Exception( | |||||
pht( | |||||
'Expected "multipart/form-data" content type when executing a '. | |||||
'multipart body read.')); | |||||
} | |||||
$type_parts = preg_split('(\s*;\s*)', $content_type); | |||||
$boundary = null; | |||||
foreach ($type_parts as $type_part) { | |||||
$matches = null; | |||||
if (preg_match('(^boundary=(.*))', $type_part, $matches)) { | |||||
$boundary = $matches[1]; | |||||
break; | |||||
} | |||||
} | |||||
if ($boundary === null) { | |||||
throw new Exception( | |||||
pht('Received "multipart/form-data" request with no "boundary".')); | |||||
} | |||||
$this->parts = array(); | |||||
$this->part = null; | |||||
$this->buffer = ''; | |||||
$this->boundary = $boundary; | |||||
// We're looking for a (usually empty) body before the first boundary. | |||||
$this->state = 'bodynewline'; | |||||
} | |||||
public function continueParse($bytes) { | |||||
$this->buffer .= $bytes; | |||||
$continue = true; | |||||
while ($continue) { | |||||
switch ($this->state) { | |||||
case 'endboundary': | |||||
// We've just parsed a boundary. Next, we expect either "--" (which | |||||
// indicates we've reached the end of the parts) or "\r\n" (which | |||||
// indicates we should read the headers for the next part). | |||||
if (strlen($this->buffer) < 2) { | |||||
// We don't have enough bytes yet, so wait for more. | |||||
$continue = false; | |||||
break; | |||||
} | |||||
if (!strncmp($this->buffer, '--', 2)) { | |||||
// This is "--" after a boundary, so we're done. We'll read the | |||||
// rest of the body (the "epilogue") and discard it. | |||||
$this->buffer = substr($this->buffer, 2); | |||||
$this->state = 'epilogue'; | |||||
$this->part = null; | |||||
break; | |||||
} | |||||
if (!strncmp($this->buffer, "\r\n", 2)) { | |||||
// This is "\r\n" after a boundary, so we're going to going to | |||||
// read the headers for a part. | |||||
$this->buffer = substr($this->buffer, 2); | |||||
$this->state = 'header'; | |||||
// Create the object to hold the part we're about to read. | |||||
$part = new AphrontMultipartPart(); | |||||
$this->parts[] = $part; | |||||
$this->part = $part; | |||||
break; | |||||
} | |||||
throw new Exception( | |||||
pht('Expected "\r\n" or "--" after multipart data boundary.')); | |||||
case 'header': | |||||
// We've just parsed a boundary, followed by "\r\n". We are going | |||||
// to read the headers for this part. They are in the form of HTTP | |||||
// headers and terminated by "\r\n". The section is terminated by | |||||
// a line with no header on it. | |||||
if (strlen($this->buffer) < 2) { | |||||
// We don't have enough data to find a "\r\n", so wait for more. | |||||
$continue = false; | |||||
break; | |||||
} | |||||
if (!strncmp("\r\n", $this->buffer, 2)) { | |||||
// This line immediately began "\r\n", so we're done with parsing | |||||
// headers. Start parsing the body. | |||||
$this->buffer = substr($this->buffer, 2); | |||||
$this->state = 'body'; | |||||
break; | |||||
} | |||||
// This is an actual header, so look for the end of it. | |||||
$header_len = strpos($this->buffer, "\r\n"); | |||||
if ($header_len === false) { | |||||
// We don't have a full header yet, so wait for more data. | |||||
$continue = false; | |||||
break; | |||||
} | |||||
$header_buf = substr($this->buffer, 0, $header_len); | |||||
$this->part->appendRawHeader($header_buf); | |||||
$this->buffer = substr($this->buffer, $header_len + 2); | |||||
break; | |||||
case 'body': | |||||
// We've parsed a boundary and headers, and are parsing the data for | |||||
// this part. The data is terminated by "\r\n--", then the boundary. | |||||
// We'll look for "\r\n", then switch to the "bodynewline" state if | |||||
// we find it. | |||||
$marker = "\r"; | |||||
$marker_pos = strpos($this->buffer, $marker); | |||||
if ($marker_pos === false) { | |||||
// There's no "\r" anywhere in the buffer, so we can just read it | |||||
// as provided. Then, since we read all the data, we're done until | |||||
// we get more. | |||||
// Note that if we're in the preamble, we won't have a "part" | |||||
// object and will just discard the data. | |||||
if ($this->part) { | |||||
$this->part->appendData($this->buffer); | |||||
} | |||||
$this->buffer = ''; | |||||
$continue = false; | |||||
break; | |||||
} | |||||
if ($marker_pos > 0) { | |||||
// If there are bytes before the "\r", | |||||
if ($this->part) { | |||||
$this->part->appendData(substr($this->buffer, 0, $marker_pos)); | |||||
} | |||||
$this->buffer = substr($this->buffer, $marker_pos); | |||||
} | |||||
$expect = "\r\n"; | |||||
$expect_len = strlen($expect); | |||||
if (strlen($this->buffer) < $expect_len) { | |||||
// We don't have enough bytes yet to know if this is "\r\n" | |||||
// or not. | |||||
$continue = false; | |||||
break; | |||||
} | |||||
if (strncmp($this->buffer, $expect, $expect_len)) { | |||||
// The next two bytes aren't "\r\n", so eat them and go looking | |||||
// for more newlines. | |||||
if ($this->part) { | |||||
$this->part->appendData(substr($this->buffer, 0, $expect_len)); | |||||
} | |||||
$this->buffer = substr($this->buffer, $expect_len); | |||||
break; | |||||
} | |||||
// Eat the "\r\n". | |||||
$this->buffer = substr($this->buffer, $expect_len); | |||||
$this->state = 'bodynewline'; | |||||
break; | |||||
case 'bodynewline': | |||||
// We've parsed a newline in a body, or we just started parsing the | |||||
// request. In either case, we're looking for "--", then the boundary. | |||||
// If we find it, this section is done. If we don't, we consume the | |||||
// bytes and move on. | |||||
$expect = '--'.$this->boundary; | |||||
$expect_len = strlen($expect); | |||||
if (strlen($this->buffer) < $expect_len) { | |||||
// We don't have enough bytes yet, so wait for more. | |||||
$continue = false; | |||||
break; | |||||
} | |||||
if (strncmp($this->buffer, $expect, $expect_len)) { | |||||
// This wasn't the boundary, so return to the "body" state and | |||||
// consume it. (But first, we need to append the "\r\n" which we | |||||
// ate earlier.) | |||||
if ($this->part) { | |||||
$this->part->appendData("\r\n"); | |||||
} | |||||
$this->state = 'body'; | |||||
break; | |||||
} | |||||
// This is the boundary, so toss it and move on. | |||||
$this->buffer = substr($this->buffer, $expect_len); | |||||
$this->state = 'endboundary'; | |||||
break; | |||||
case 'epilogue': | |||||
// We just discard any epilogue. | |||||
$this->buffer = ''; | |||||
$continue = false; | |||||
break; | |||||
default: | |||||
throw new Exception( | |||||
pht( | |||||
'Unknown parser state "%s".\n', | |||||
$this->state)); | |||||
} | |||||
} | |||||
} | |||||
public function endParse() { | |||||
if ($this->state !== 'epilogue') { | |||||
throw new Exception( | |||||
pht( | |||||
'Expected "multipart/form-data" parse to end '. | |||||
'in state "epilogue".')); | |||||
} | |||||
return $this->parts; | |||||
} | |||||
} |