diff --git a/src/Fetch/Attachment.php b/src/Fetch/Attachment.php index 1353571..ed3c6ca 100644 --- a/src/Fetch/Attachment.php +++ b/src/Fetch/Attachment.php @@ -109,20 +109,28 @@ public function __construct(Message $message, $structure, $partIdentifier = null } /** - * This function returns the data of the attachment. Combined with getMimeType() it can be used to directly output - * data to a browser. + * This function returns the data of the attachment. Combined with + * getMimeType() it can be used to directly output data to a browser. + * + * If the attachment file is message/rfc822, skip processing/decoding the + * contents in order to avoid mangling the file. Otherwise, decode as + * normal to ensure other files are handled correctly. * * @return string */ public function getData() { if (!isset($this->data)) { - $messageBody = isset($this->partId) ? + $rawBody = isset($this->partId) ? imap_fetchbody($this->imapStream, $this->messageId, $this->partId, FT_UID) : imap_body($this->imapStream, $this->messageId, FT_UID); - $messageBody = Message::decode($messageBody, $this->encoding); - $this->data = $messageBody; + if (strpos(strtolower($this->mimeType), "rfc822") !== false) { + $this->data = $rawBody; + } else { + $decodedBody = Message::decode($rawBody, $this->encoding); + $this->data = $decodedBody; + } } return $this->data; diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index 6a0b8c6..6157684 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -20,6 +20,19 @@ */ class Message { + /** + * Primary Body Types + * According to http://www.php.net/manual/en/function.imap-fetchstructure.php + */ + const TYPE_TEXT = 0; + const TYPE_MULTIPART = 1; + const TYPE_MESSAGE = 2; + const TYPE_APPLICATION = 3; + const TYPE_AUDIO = 4; + const TYPE_IMAGE = 5; + const TYPE_VIDEO = 6; + const TYPE_OTHER = 7; + /** * This is the connection/mailbox class that the email came from. * @@ -167,25 +180,7 @@ class Message * * @var string */ - public static $charset = 'UTF-8'; - - /** - * This value defines the flag set for encoding if the mb_convert_encoding - * function can't be found, and in this case iconv encoding will be used. - * - * @var string - */ - public static $charsetFlag = '//TRANSLIT'; - - /** - * These constants can be used to easily access available flags - */ - const FLAG_RECENT = 'recent'; - const FLAG_FLAGGED = 'flagged'; - const FLAG_ANSWERED = 'answered'; - const FLAG_DELETED = 'deleted'; - const FLAG_SEEN = 'seen'; - const FLAG_DRAFT = 'draft'; + public static $charset = 'UTF-8//TRANSLIT'; /** * This constructor takes in the uid for the message and the Imap class representing the mailbox the @@ -212,7 +207,6 @@ public function __construct($messageUniqueId, Server $connection) */ protected function loadMessage() { - /* First load the message overview information */ if(!is_object($messageOverview = $this->getOverview())) @@ -251,8 +245,24 @@ protected function loadMessage() $this->processStructure($structure); } else { // multipart - foreach ($structure->parts as $id => $part) + foreach ($structure->parts as $id => $part) { + if (!empty($part->description)) { + $cleanFilename = $this->makeFilenameSafe($part->description); + $part->description = $cleanFilename; + foreach ($part->parameters as $key => $parameter) { + if ($parameter->attribute === "name") { + $part->parameters[$key]->value = $cleanFilename; + } + } + foreach ($part->dparameters as $key => $dparameter) { + if ($dparameter->attribute === "filename") { + $part->dparameters[$key]->value = $cleanFilename; + } + } + } + $this->processStructure($part, $id + 1); + } } return true; @@ -434,35 +444,138 @@ public function getImapBox() } /** - * This function takes in a structure and identifier and processes that part of the message. If that portion of the - * message has its own subparts, those are recursively processed using this function. + * Adds an attachment * - * @param \stdClass $structure - * @param string $partIdentifier + * If a filename is not provided and the attachment is a message/rfc822 + * email, parse the Subject line and use it as the filename. If the Subject + * line is blank or illegible, use a default filename (like Gmail and some + * desktop clients do) + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * @return boolean Successful attachment of file */ - protected function processStructure($structure, $partIdentifier = null) + protected function addAttachment($parameters, $structure, $partIdentifier) { - $parameters = self::getParametersFromStructure($structure); + if (!(isset($parameters["name"]) || isset($parameters["filename"])) && $structure->type == self::TYPE_MESSAGE) { + $body = isset($partIdentifier) ? + imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) + : imap_body($this->imapStream, $this->uid, FT_UID); + + $headers = iconv_mime_decode_headers($body, 0, self::$charset); + $filename = !empty($headers["Subject"]) ? $this->makeFilenameSafe($headers["Subject"]) : "email"; - if (isset($parameters['name']) || isset($parameters['filename'])) { + $dpar = new \stdClass(); + $dpar->attribute = "filename"; + $dpar->value = str_replace(array("\r", "\n"), '', $filename) . ".eml"; + $structure->dparameters[] = $dpar; + } + + try { $attachment = new Attachment($this, $structure, $partIdentifier); $this->attachments[] = $attachment; - } elseif ($structure->type == 0 || $structure->type == 1) { - $messageBody = isset($partIdentifier) ? + + return true; + } catch (\Exception $e) { + return false; + } + } + + /** + * This function extracts the body of an email part, strips harmful + * Outlook-specific strings from it, processes any encoded one-liners, + * decodes it, converts it to the charset of the parent message, and + * returns the result. + * + * @param array $parameters + * @param \stdClass $structure + * @param string $partIdentifier + * @return string + */ + protected function processBody($structure, $partIdentifier) + { + $rawBody = isset($partIdentifier) ? imap_fetchbody($this->imapStream, $this->uid, $partIdentifier, FT_UID) : imap_body($this->imapStream, $this->uid, FT_UID); - $messageBody = self::decode($messageBody, $structure->encoding); + $bodyNoOutlook = $this->stripOutlookSpecificStrings($rawBody); + + $decodedBody = self::decode($bodyNoOutlook, $structure->encoding); + + $inCharset = $inCharset = mb_detect_encoding($decodedBody, array( + "US-ASCII", + "ISO-8859-1", + "UTF-8", + "UTF-7", + "ASCII", + "EUC-JP", + "SJIS", + "eucJP-win", + "SJIS-win", + "JIS", + "ISO-2022-JP", + "UTF-16", + "UTF-32", + "UCS2", + "UCS4") + ); + + if ($inCharset && $inCharset !== self::$charset) { + $decodedBody = iconv($inCharset, self::$charset, $decodedBody); + } + + return $decodedBody; + } - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { - if (function_exists('mb_convert_encoding')) { - $messageBody = mb_convert_encoding($messageBody, self::$charset, $parameters['charset']); - } else { - $messageBody = iconv($parameters['charset'], self::$charset . self::$charsetFlag, $messageBody); - } - } + /** + * Removes "Thread-Index:" line from the message body which is placed there + * by Outlook and messes up the other processing steps. + * + * @param string $messageBody + * @return string + */ + protected function stripOutlookSpecificStrings($bodyBefore) + { + $bodyAfter = preg_replace('/Thread-Index:.*$/m', "", $bodyBefore); - if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) { + return $bodyAfter; + } + + /** + * This function takes in a string to be used as a filename and replaces + * any dangerous characters with underscores to ensure compatibility with + * various file systems + * + * @param string $oldName + * @return string + */ + protected function makeFilenameSafe($oldName) + { + return preg_replace('/[<>"{}|\\\^\[\]`;\/\?:@&=$,]/',"_", $oldName); + } + + /** + * This function takes in a structure and identifier and processes that part of the message. If that portion of the + * message has its own subparts, those are recursively processed using this function. + * + * @param \stdClass $structure + * @param string $partIdentifier + */ + protected function processStructure($structure, $partIdentifier = null) + { + $attached = false; + + // TODO: Get HTML attachments working, too! + if (isset($structure->disposition) && $structure->disposition == "attachment") { + $parameters = self::getParametersFromStructure($structure); + $attached = $this->addAttachment($parameters, $structure, $partIdentifier); + } + + if (!$attached && ($structure->type == self::TYPE_TEXT || $structure->type == self::TYPE_MULTIPART)) { + $messageBody = $this->processBody($structure, $partIdentifier); + + if (strtolower($structure->subtype) === 'plain' || ($structure->type == self::TYPE_MULTIPART && strtolower($structure->subtype) !== 'alternative')) { if (isset($this->plaintextMessage)) { $this->plaintextMessage .= PHP_EOL . PHP_EOL; } else { @@ -479,17 +592,16 @@ protected function processStructure($structure, $partIdentifier = null) $this->htmlMessage .= $messageBody; } - } - if (isset($structure->parts)) { // multipart: iterate through each part + if (isset($structure->parts)) { // multipart: iterate through each part + foreach ($structure->parts as $partIndex => $part) { + $partId = $partIndex + 1; - foreach ($structure->parts as $partIndex => $part) { - $partId = $partIndex + 1; + if (isset($partIdentifier)) + $partId = $partIdentifier . '.' . $partId; - if (isset($partIdentifier)) - $partId = $partIdentifier . '.' . $partId; - - $this->processStructure($part, $partId); + $this->processStructure($part, $partId); + } } } } @@ -566,13 +678,17 @@ public static function typeIdToString($id) public static function getParametersFromStructure($structure) { $parameters = array(); - if (isset($structure->parameters)) - foreach ($structure->parameters as $parameter) + if (isset($structure->parameters)) { + foreach ($structure->parameters as $parameter) { $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } - if (isset($structure->dparameters)) - foreach ($structure->dparameters as $parameter) + if (isset($structure->dparameters)) { + foreach ($structure->dparameters as $parameter) { $parameters[strtolower($parameter->attribute)] = $parameter->value; + } + } return $parameters; } diff --git a/src/Fetch/Server.php b/src/Fetch/Server.php index 4fca523..71ccdd8 100644 --- a/src/Fetch/Server.php +++ b/src/Fetch/Server.php @@ -159,8 +159,6 @@ public function setMailBox($mailbox = '') return false; } - - $this->mailbox = $mailbox; if (isset($this->imapStream)) { $this->setImapStream();