cmc-sales/app/vendors/shells/vault.php
2013-04-07 08:44:37 +10:00

728 lines
18 KiB
PHP
Executable file

<?php
/**
* file: vault
*
* Major re-working of the vault idea.
*
*
* No more IMAP connections. These were way too slow.
*
* Reading these files from the local file system.
*
*/
App::import('Sanitize');
class VaultShell extends Shell {
var $uses = array('Enquiry', 'Contact', 'Invoice', 'PurchaseOrder', 'User', 'Email', 'EmailRecipient', 'Job');
function main() {
/******************************************************
* Config Variables
* *****************************************************/
$testing = 0; //Whether to actually move the emails. 1=test, 0=production
if($testing == 1) {
$ripmime_path = '/opt/local/bin/ripmime';
$email_dir = '/Users/karlcordes/Sites/quotenik/app/emails';
$vault_dir = '/Users/karlcordes/Sites/quotenik/app/vaultmsgs';
$processed_dir = '/Users/karlcordes/Sites/quotenik/app/processed_vaultmsgs';
$lockfile = '/Users/karlcordes/Sites/quotenik/app/vault.lock';
}
else { //Production
$ripmime_path = '/usr/local/bin/ripmime';
$email_dir = '/var/www/cakephp/app/emails';
$vault_dir = '/var/www/cakephp/app/vaultmsgs';
$processed_dir = '/var/www/cakephp/app/processed_vaultmsgs';
$lockfile = '/var/www/CMC-Sales/app/vault.lock';
}
if(file_exists($lockfile)) {
die("Error: Another process is currently running. Terminating this one");
}
else {
if(!touch($lockfile)) {
die("FATAL ERROR: Unable to create vault.lock");
}
}
/* Find the strings we want to look for in the subjects. Build arrays
* using the string as the keys, IDs as the value
*/
$enquiries = $this->Enquiry->find('all', array('recursive'=>0,
'fields' => array('Enquiry.title', 'Enquiry.id')));
$invoices = $this->Invoice->find('all', array('recursive'=>0, '
fields'=>array('Invoice.id', 'Invoice.title')));
$purchaseOrders = $this->PurchaseOrder->find('all',
array('recursive'=>0,
'fields'=>array('PurchaseOrder.id', 'PurchaseOrder.title')));
$users = $this->User->find('all', array('recursive'=>0,
'fields'=>array('User.id', 'User.email')));
$jobs = $this->Job->find('all', array('recursive'=>0, 'fields'=>array('Job.id', 'Job.title')));
$enquiryMap = $this->makeMap($enquiries,'Enquiry', 'title');
$invoiceMap = $this->makeMap($invoices, 'Invoice', 'title');
$poMap = $this->makeMap($purchaseOrders, 'PurchaseOrder', 'title');
$userMap = $this->makeMap($users, 'User', 'email');
$jobMap = $this->makeMap($jobs, 'Job', 'title');
$emails = scandir($vault_dir);
$processed = scandir($processed_dir);
$new = array_diff($emails, $processed);
/**
* Loop through the messages.
*
* Procedure:
*
* 1. Check to see if the recipients (CC, or to), or Sender is in the $userMap.
* 1a. If one of these is true, we need to capture this email. Else, delete it.
*
* 2. This email is either to/from/CC a user in the system.
* We need to associate it with the right PO/Invoice/Enquiries etc.
* Check the subject for patterns that match Identifiers.
*
*/
foreach($new as $email_filename) {
echo "Handling $email_filename\n";
if( ($email_filename == '.') || ($email_filename == '..') ) {
continue;
}
if(!$this->isEmlFile($vault_dir, $email_filename)) {
echo "Incorrect file extension. Skipping this file";
continue;
}
/*if(file_exists($processed_dir."/".$email_filename)) {
continue;
}*/
$content = file_get_contents($vault_dir."/".$email_filename);
if(!$content) {
echo "No Content Found. Ignoring this email\n";
if(!$this->makeSymlink($email_filename, $vault_dir, $processed_dir)) {
echo "Unable to make symlink to process this email Critical error: {$email_filename}\n";
if(!unlink($lockfile)) {
die("FATAL ERROR: Unable to remove vault.lock. No further Vault instances can run!");
}
}
else {
continue;
}
}
$content = str_replace("\r", "", $content);
$headers = imap_rfc822_parse_headers($content);
$recipients = $this->getRecipients($headers, true);
$unix_time = strtotime($headers->date); //Used for filing attachments into month-year folders.
//MM-YYYY used For lack of a better option really.
$saveThis = false; //Set to true, if To,From,CC is a Known User.
$fromKnownUser = false;
foreach($recipients['to'] as $email) {
$saveThis = $this->userExists($email, $userMap);
}
foreach($recipients['from'] as $email) {
$saveThis = $this->userExists($email, $userMap);
$fromKnownUser = $saveThis;
}
foreach($recipients['cc'] as $email) {
$saveThis = $this->userExists($email, $userMap);
}
if(!isset($headers->subject)) { //Emails without a subject are not welcome. Skip it.
echo "No Subject Found. Ignoring this email\n";
if(!$this->makeSymlink($email_filename, $vault_dir, $processed_dir)) {
die("Unable to make symlink to process this email Critical error: {$email_filename}");
}
else {
continue;
}
}
$subjDecoded = imap_mime_header_decode($headers->subject);
$foundIdent = false;
foreach($subjDecoded as $sub) {
$found_enquiries = $this->checkValidIdentifier($sub->text, $enquiryMap, 'enquiry');
$foundIdent = $this->foundIdentifier($found_enquiries);
$found_invoices = $this->checkValidIdentifier($sub->text, $invoiceMap, 'invoice');
$foundIdent = $this->foundIdentifier($found_invoices);
$found_pos = $this->checkValidIdentifier($sub->text, $poMap, 'purchaseorder');
$foundIdent = $this->foundIdentifier($found_pos);
$found_jobs = $this->checkValidIdentifier($sub->text, $jobMap, 'job');
$foundIdent = $this->foundIdentifier($found_jobs);
}
//We're going to save this Email.
//Lets grab the attachments (if any) then create the appropiate Objects.
if($fromKnownUser == true || $saveThis == true || $foundIdent == true) {
$recipientsIDs['to'] = array();
$recipientsIDs['from'] = array();
$recipientsIDs['cc'] = array();
/**
* Loop over each recipient email found. Build an array containing the User IDs of that user.
*
* If that email doesn't have a corresponding User ID. We need to create a new User for this email address.
*
*
*/
foreach($recipients as $type => $recipientArray) {
foreach($recipientArray as $recEmail) {
$recEmailLC = strtolower($recEmail);
if(isset($userMap[$recEmailLC])) {
$recipientsIDs[$type][] = $userMap[$recEmailLC]; //Existing User ID.
}
else {
//$recipientsIDs[$type][] = "NEW User: $recEmail"; //Need to make a new user ID.
echo "Making a new User for: '{$recEmailLC}'\n";
$newUser['User'] = array(
'type' => 'contact',
'email' => $recEmailLC,
'by_vault' => 1
);
$this->User->create();
if($this->User->save($newUser, false)) {
$newID = $this->User->id;
echo "New User '{$recEmail}' Added with ID: {$newID}\n";
$recipientsIDs[$type][] = $newID;
//Add it to the userMap
$userMap[$recEmailLC] = $newID;
}
else {
echo "Serious Error: Unable to create user for email '{$recEmailLC}'\n";
continue;
}
}
}
}
echo "---------START MESSAGE -----------------\n";
echo "Subject: ".$subjDecoded[0]->text."\n";
$newEmail = array();
/** Testing creating the Email, Recipient and Attachments */
$newEmail['Email'] = array();
if(isset($recipientsIDs['from'][0])) {
$newEmail['Email']['user_id'] = $recipientsIDs['from'][0]; //We should never have more than one 'from' user ID.
}
else {
print_r($recipientsIDs);
echo "Email has no From Recipient ID. Ignoring this email\n";
$this->makeSymlink($email_filename, $vault_dir, $processed_dir);
continue;
}
//die("Email has no from Recipient!! Critical Failure"); //Change this to logging for deployment.
$newEmail['Email']['udate'] = $unix_time;
$allowedChars = array('@', ';', ':', '.', '$', '%', '*','#','!',',','[', ']',' ','{','}','|', '(', ')', '-');
$newEmail['Email']['subject'] = Sanitize::paranoid($subjDecoded[0]->text, $allowedChars);
$newEmail['Email']['filename'] = $email_filename;
$recipientCount = 0;
foreach($recipientsIDs as $type => $typeEmails) {
foreach($typeEmails as $userID) {
$newEmail['EmailRecipient'][$recipientCount]['user_id'] = $userID;
switch($type) {
case 'to':
$newEmail['EmailRecipient'][$recipientCount]['type'] = 'to';
$recipientCount++;
break;
case 'cc':
$newEmail['EmailRecipient'][$recipientCount]['type'] = 'cc';
$recipientCount++;
break;
default:
}
}
}
/** End Email creation **/
/** Start Attachments **/
$relative_path = $this->getAttachmentDirectory($email_dir, $unix_time);
$attachments = $this->fetchBodyAttachments($email_filename, $vault_dir, $email_dir, $relative_path, $ripmime_path);
$attachmentCount = 0;
$biggestHTML = 0;
foreach($attachments as $attachment) {
if(empty($attachment['type'])) {
continue;
}
$newEmail['EmailAttachment'][$attachmentCount]['name'] = $attachment['name'];
$newEmail['EmailAttachment'][$attachmentCount]['type'] = $attachment['type'];
$newEmail['EmailAttachment'][$attachmentCount]['size'] = $attachment['size'];
$newEmail['EmailAttachment'][$attachmentCount]['filename'] = $attachment['filename'];
$newEmail['EmailAttachment'][$attachmentCount]['is_message_body'] = $attachment['is_message_body'];
$attachmentCount++;
}
/** End attachments */
/* If we found Identifiers for this Email. Create the HABTM associations.
*
*/
if(count($found_jobs) > 0) {
// echo "Found Job IDs: \n";
// print_r($found_jobs);
foreach($found_jobs as $jobID) {
$newEmail['Job'][] = array('job_id'=> $jobID);
}
}
if(count($found_pos) > 0) {
// echo "Found PO IDs: \n";
// print_r($found_pos);
foreach($found_pos as $poID) {
$newEmail['PurchaseOrder'][] = array('purchase_order_id'=> $poID);
}
}
if(count($found_enquiries) > 0) {
// echo "Found enquiry IDs: \n";
// print_r($found_enquiries);
foreach($found_enquiries as $enqID) {
$newEmail['Enquiry'][] = array('enquiry_id'=> $enqID);
}
}
if(count($found_invoices) > 0) {
// echo "Found Invoice IDs: \n";
// print_r($found_invoices);
foreach($found_invoices as $invID) {
$newEmail['Invoice'][] = array('invoice_id'=> $invID);
}
}
//print_r($newEmail);
$this->Email->create();
$emailID = $this->Email->saveAll($newEmail);
if(!empty($emailID)) {
echo "Success. We made an email\n";
}
else {
echo "Error! Didn't save the email\n";
}
echo "--------END MESSAGE ------\n";
}
else {
echo "email will not be saved. Subject: ".$subjDecoded[0]->text."\n";
}
if($this->makeSymlink($email_filename, $vault_dir, $processed_dir)) {
echo "Created Symlink Sucessfully. Email has been processed\n";
}
else {
echo "Unable to create Symlink for {$email_filename}. Critical Error! \n";
}
}
if(!unlink($lockfile)) {
die("FATAL ERROR: Unable to remove vault.lock. No further Vault instances can run!");
}
}
function isEmlFile($vault_dir, $filename) {
$info = pathinfo($vault_dir."/".$filename);
if($info['extension'] == 'eml') {
return true;
}
else {
return false;
}
}
function makeSymlink($email_filename, $vault_dir, $processed_dir) {
if(symlink($vault_dir."/".$email_filename, $processed_dir."/".$email_filename)) {
return true;
}
else {
die("Unable to create symlink for {$email_filename}");
return false;
}
}
/**
* Make a 'HashMap' (array) in the format.
* $newMap[$key] = $obj[$modelName]['id'];
*
* @param array $objects
* @param string $modelName
* @param string $keyName
* @return array
*/
function makeMap($objects,$modelName, $keyName) {
$newMap = array();
foreach ($objects as $obj) {
$key = $obj[$modelName][$keyName];
if($modelName == 'User') {
strtolower($key);
}
$newMap[$key] = $obj[$modelName]['id'];
}
return $newMap;
}
/**
*
* @param <type> $recipient
* @param <type> $emailMap
* @return <type>
*/
function userExists($recipient, &$emailMap) {
if(isset($emailMap[$recipient])) {
//echo "$recipient is a user in the system. Will be saved\n";
return true;
}
else {
return false;
//echo "$recipient is not a user in the system.\n";
}
}
/**
*
* @param <type> $subject
* @param <type> $list
* @param <type> $type
*/
function checkValidIdentifier($subject, &$list, $type) {
$returnArray = array();
if($type == 'enquiry') {
preg_match("/CMC\d+([NVQWSOT]|ACT|NT)E\d+-\d+/", $subject, $output);
}
else if ($type == 'invoice') {
preg_match("/CMCIN\d+/", $subject, $output);
}
else if ($type == 'purchaseorder') {
preg_match("/CMCPO\d+/", $subject, $output);
}
else if($type == 'job') {
preg_match("/(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\d+(N|V|W|S|T|NT|ACT)J\d+/", $subject, $output);
}
foreach($output as $match) {
if(isset($list[$match])) { //check if it actually exists.
$returnArray[] = $list[$match];
}
}
return $returnArray;
}
/**
*
* @param <type> $found_identifier
* @return <type>
*/
function foundIdentifier(&$found_identifier) {
if(count($found_identifier) > 0) {
return true;
}
else {
return;
}
}
/**
* get Recipients from the headers of an email
*
*/
function getRecipients($headers) {
$recipients['to'] = array();
$recipients['from'] = array();
$recipients['cc'] = array();
if(isset($headers->to)) {
$recipients['to'] = $this->buildEmailArray($headers->to);
}
if(isset($headers->cc)) {
$recipients['cc'] = $this->buildEmailArray($headers->cc);
}
if(isset($headers->from)) {
$recipients['from'] = $this->buildEmailArray($headers->from);
}
return $recipients;
}
/**
* Build an array containing email addresses from the header object
* passed to it.
*
* @param Object $recipientObj
* @return Array
*/
function buildEmailArray($recipientObj) {
$recipientList = array();
if(count($recipientObj) > 0) {
foreach($recipientObj as $id=> $object) {
if(!isset($object->host)) { //No hostname? Ignore it.
continue;
}
else {
$recipientList[] = strtolower($object->mailbox."@".$object->host);
}
}
return $recipientList;
}
}
/*
* Fetches the body and attachments from a MIME encoded email. Uses ripmime to do the decoding
*
*/
function fetchBodyAttachments($email_filename, $vault_dir, $email_dir, $relative_path, $ripmime_path) {
$uuid = String::uuid();
$email_file = $vault_dir."/".$email_filename;
$command = "$ripmime_path -i $email_file -d $email_dir/$relative_path --prefix --paranoid -v --verbose-contenttype --recursion-max 30";
$output = array();
exec($command, $output, $status);
// echo $email_file ."\n";
// echo $command ."\n";
/* Check the $output array and find the filenames of the attachments */
if($status == 0) {
$attachments = array();
$biggestHTML_num = 999; //Determine the likely HTML body of this email. Guessing, but hopefully reasonably correctly.
$biggestHTML_size = 0;
$biggestPlain_num = 999; //I dont think we'll get legit emails with >999 attachments.
$biggestPlain_size =0;
for($i=0, $j=0; $i< count($output); $i++, $j++) {
$words = explode(' ', $output[$i]);
$type = explode('=', $words[1]);
$name = explode('=', $words[2]);
/*echo "OUTPUT\n";
print_r($output);
echo count($output)."\n";
echo "TYPE\n";
print_r($type);
echo count($type)."\n";
*/
if(count($type) != 2) {
//echo "Didnt find a proper type. Skipping it.";
continue;
}
if(count($name) != 2) {
//echo "Didnt find a proper name. Skipping it";
continue;
}
$oldNamePath = $email_dir.'/'.$relative_path.'/'.$name[1];
$newNamePath = $email_dir.'/'.$relative_path.'/'.$uuid.'-'.$name[1];
$newName = $relative_path.'/'.$uuid.'-'.$name[1];
rename($oldNamePath, $newNamePath);
$size = filesize($newNamePath);
$attachments[$j]['type'] = $type[1];
$attachments[$j]['name'] = $newName;
$attachments[$j]['filename'] = $name[1];
$attachments[$j]['size'] = $size;
$attachments[$j]['is_message_body'] = 0;
//echo "found attachment ".$attachments[$j]['name'].' '.$attachments[$j]['type']."\n";
/** Record the array ID and size of the largest HTML & plain attachments **/
if($attachments[$j]['type'] == 'text/html') {
if($size > $biggestHTML_size) {
$biggestHTML_num = $j;
$biggestHTML_size = $size;
}
}
else if($attachments[$j]['type'] == 'text/plain') {
if($size > $biggestPlain_size) {
$biggestPlain_num = $j;
$biggestPlain_size = $size;
}
}
}
if($biggestHTML_num != 999) { //We found an HTML attachment.
$attachments[$biggestHTML_num]['is_message_body'] = 1;
}
else if($biggestPlain_num != 999) {
$attachments[$biggestPlain_num]['is_message_body'] = 1;
}
return $attachments;
}
else {
return 1;
}
}
/**
* Passed the configured $email_dir and the unix time of this message.
*
* 1. Checks to see if the /emails/MM-YYYY/ directory exists.
* 2a. If not, creates it and returns the full path to this directory.
* 2b. If exists, returns the path to this directory.
*
* @param <type> $email_dir
* @param <type> $unix_time
*/
function getAttachmentDirectory($email_dir, $unix_time) {
$month_year = date('m-Y', $unix_time);
$path = $email_dir."/".$month_year;
if(!file_exists($path)) { //Directory doesn't exist Lets make it!
if(mkdir($path)) {
return $month_year;
}
else {
die("Unable to make directory $email_dir/$month_year");
}
}
else {
return $month_year;
}
}
/**
* The attachments have been decoded and written to disk by ripmime.
*
* 1. Generate a uuid for this email
* 2. Is there a month-year folder this this current month? If not, make one.
* 3. Move the attachments to the correct month-year Folder. Prefix their new names with uuid.
* 4.
*
* @param <type> $attachments
function moveAttachments($attachments, $email_dir, $unix_time) {
$month_year = date('m-Y');
$uuid = String::uuid();
foreach($attachments as $attachment) {
echo $email_dir.'/'.$attachment['name']."\n";
}
die();
}*/
}
?>