Refactored vault hugely. Hopefully its working properly now

This commit is contained in:
Karl Cordes 2011-03-15 18:28:17 +11:00
parent 2e563ce1f5
commit d12825247a

View file

@ -14,7 +14,7 @@
* *
* *
*/ */
App::import('Sanitize');
class VaultShell extends Shell { class VaultShell extends Shell {
@ -27,28 +27,25 @@ class VaultShell extends Shell {
/****************************************************** /******************************************************
* Config Variables * Config Variables
* *****************************************************/ * *****************************************************/
$testing = 1; //Whether to actually move the emails. 1=test, 0=production $testing = 0; //Whether to actually move the emails. 1=test, 0=production
if($testing == 1) { if($testing == 1) {
$ripmime_path = '/opt/local/bin/ripmime'; $ripmime_path = '/opt/local/bin/ripmime';
$email_dir = '/Users/karlcordes/Sites/quotenik/app/emails'; $email_dir = '/Users/karlcordes/Sites/quotenik/app/emails';
$vault_dir = '/Users/karlcordes/Sites/quotenik/app/vaultmsgs/#msgs'; $vault_dir = '/Users/karlcordes/Sites/quotenik/app/vaultmsgs';
$processed_dir = '/Users/karlcordes/Sites/quotenik/app/processed_vaultmsgs';
} }
else { else {
$ripmime_path = '/usr/local/bin/ripmime'; $ripmime_path = '/usr/local/bin/ripmime';
$email_dir = '/var/www/cakephp/app/emails'; $email_dir = '/var/www/cakephp/app/emails';
$vault_dir = '/var/www/cakephp/app/vaultmsgs/#msgs'; $vault_dir = '/var/www/cakephp/app/vaultmsgs';
$processed_dir = '/var/www/cakephp/app/processed_vaultmsgs';
} }
$emails = scandir($vault_dir); $emails = scandir($vault_dir);
/* Find the strings we want to look for in the subjects. Build arrays /* Find the strings we want to look for in the subjects. Build arrays
* using the string as the keys, IDs as the value * using the string as the keys, IDs as the value
*/ */
@ -76,11 +73,6 @@ class VaultShell extends Shell {
$jobMap = $this->makeMap($jobs, 'Job', 'title'); $jobMap = $this->makeMap($jobs, 'Job', 'title');
$store = array();
$discard = array();
/** /**
* Loop through the messages. * Loop through the messages.
* *
@ -94,49 +86,41 @@ class VaultShell extends Shell {
* Check the subject for patterns that match Identifiers. * Check the subject for patterns that match Identifiers.
* *
*/ */
foreach($emails as $email) { foreach($emails as $email_filename) {
echo "Handling $email_filename\n";
print $email."\n"; if( ($email_filename == '.') || ($email_filename == '..') ) {
continue;
}
if (!($content = fread(fopen(EML_FILE_PATH.$filename, 'r'), filesize(EML_FILE_PATH.$filename)))) if(file_exists($processed_dir."/".$email_filename)) {
die('File not found ('.EML_FILE_PATH.$filename.')'); continue;
}
$content = file_get_contents($vault_dir."/".$email_filename);
if(!$content) {
die("Unable to read ".$vault_dir."/".$email_filename);
}
//Handle files coming from windows (\r\n vs \n): //Handle files coming from windows (\r\n vs \n):
// Thanks to Dan Hulme (dhulme@gmail.com) // Thanks to Dan Hulme (dhulme@gmail.com)
$content = str_replace("\r", "", $content); $content = str_replace("\r", "", $content);
// Keep a copy of the original file // Keep a copy of the original file
$raw=$content; //$raw=$content;
$headers = imap_rfc822_parse_headers($content); $headers = imap_rfc822_parse_headers($content);
//print_r($headers);
//$message = $this->getMessage($mbox, $i, $this_header);
$recipients = $this->getRecipients($headers, true); $recipients = $this->getRecipients($headers, true);
print_r($recipients);
die();
continue; $unix_time = strtotime($headers->date); //Used for filing attachments into month-year folders.
$this_header = imap_headerinfo($mbox, $i);
$this_uid = imap_uid($mbox, $i);
$unix_time = $this_header->udate; //Used for filing attachments into month-year folders.
//MM-YYYY used For lack of a better option really. //MM-YYYY used For lack of a better option really.
//$message = $this->getMessage($mbox, $i, $this_header);
$recipients = $this->getRecipients($this_header, true);
// continue;
//die();
$saveThis = false; //Set to true, if To,From,CC is a Known User. $saveThis = false; //Set to true, if To,From,CC is a Known User.
$fromKnownUser = false; $fromKnownUser = false;
@ -154,14 +138,12 @@ class VaultShell extends Shell {
} }
//print_r($recipients); if(!isset($headers->subject)) { //Emails without a subject are not welcome. Skip it.
if(!isset($this_header->subject)) { //Emails without a subject are not welcome. Skip it.
$discard[] = $this_uid; $discard[] = $this_uid;
continue; continue;
} }
$subjDecoded = imap_mime_header_decode($this_header->subject); $subjDecoded = imap_mime_header_decode($headers->subject);
$foundIdent = false; $foundIdent = false;
@ -182,8 +164,10 @@ class VaultShell extends Shell {
$found_jobs = $this->checkValidIdentifier($sub->text, $jobMap, 'job'); $found_jobs = $this->checkValidIdentifier($sub->text, $jobMap, 'job');
$foundIdent = $this->foundIdentifier($found_jobs); $foundIdent = $this->foundIdentifier($found_jobs);
//echo "["$sub->.$sub->text; echo $sub->text;
} }
print_r($recipients);
//We're going to save this Email. //We're going to save this Email.
@ -239,29 +223,11 @@ class VaultShell extends Shell {
} }
echo "---------START MESSAGE -----------------\n"; echo "---------START MESSAGE -----------------\n";
echo "Email No: $i\n";
echo "Subject: ".$subjDecoded[0]->text."\n"; echo "Subject: ".$subjDecoded[0]->text."\n";
$newEmail = array(); $newEmail = array();
$structure = imap_fetchstructure($mbox, $i);
//print_r($recipients);
//print_r($recipientsIDs);
//print_r($structure);
/** Testing creating the Email, Recipient and Attachments */ /** Testing creating the Email, Recipient and Attachments */
$newEmail['Email'] = array(); $newEmail['Email'] = array();
@ -278,9 +244,12 @@ class VaultShell extends Shell {
//die("Email has no from Recipient!! Critical Failure"); //Change this to logging for deployment. //die("Email has no from Recipient!! Critical Failure"); //Change this to logging for deployment.
// $newEmail['Email']['date'] = $this_header->date;
$newEmail['Email']['udate'] = $this_header->udate; $newEmail['Email']['udate'] = $unix_time;
$newEmail['Email']['subject'] = $subjDecoded[0]->text; //Hacky. Will probably fix this quickly.
$allowedChars = array('@', ';', ':', '.', '$', '%', '*','#','!',',','[', ']',' ','{','}','|', '(', ')');
$newEmail['Email']['subject'] = Sanitize::paranoid($subjDecoded[0]->text, $allowedChars); //Hacky. Will probably fix this quickly.
$newEmail['Email']['filename'] = $email_filename;
$recipientCount = 0; $recipientCount = 0;
foreach($recipientsIDs as $type => $typeEmails) { foreach($recipientsIDs as $type => $typeEmails) {
@ -308,7 +277,8 @@ class VaultShell extends Shell {
/** Start Attachments **/ /** Start Attachments **/
$relative_path = $this->getAttachmentDirectory($email_dir, $unix_time); $relative_path = $this->getAttachmentDirectory($email_dir, $unix_time);
$attachments = $this->fetchBodyAttachments($mbox, $i, $email_dir, $relative_path, $ripmime_path);
$attachments = $this->fetchBodyAttachments($email_filename, $vault_dir, $email_dir, $relative_path, $ripmime_path);
$attachmentCount = 0; $attachmentCount = 0;
@ -371,48 +341,34 @@ class VaultShell extends Shell {
} }
print_r($newEmail); //print_r($newEmail);
$this->Email->create(); $this->Email->create();
$emailID = $this->Email->saveAll($newEmail); $emailID = $this->Email->saveAll($newEmail);
if(!empty($emailID)) { if(!empty($emailID)) {
echo "Success. We made an email. Storing Email UID $this_uid\n"; echo "Success. We made an email ID: $emailID\n";
} }
else { else {
echo "Error! Didn't save the email\n"; echo "Error! Didn't save the email\n";
} }
$store[] = $this_uid;
echo "--------END MESSAGE ------\n"; echo "--------END MESSAGE ------\n";
} }
else { else {
$discard[] = $this_uid; echo "email will not be saved. Subject: ".$subjDecoded[0]->text."\n";
echo "email {$i} will not be saved. Subject: ".$subjDecoded[0]->text."\n";
} }
// print_r($recipients);
// echo "----------------------\n";
if(symlink($vault_dir."/".$email_filename, $processed_dir."/".$email_filename)) {
echo "Created Symlink sucessfully. Email processed\n";
}
else {
die("Could not create symlink to record processing of this email. Critical failure");
}
} }
reset($discard);
reset($store);
$numberToStore = count($store);
$numberToDiscard= count($discard);
if($numberToStore > 0) {
echo "Got $numberToStore messages to store\n";
$storeSet = implode(",", $store);
}
if($numberToDiscard > 0) {
echo "Going to discard $numberToDiscard messages\n";
}
} }
@ -510,24 +466,6 @@ class VaultShell extends Shell {
function getMessage($mbox, $msgnumber, $headers) {
//$subject = $headers->subject;
//$subject = iconv_mime_decode($subject, 0, "ISO-8859-1//IGNORE");
// $subject = mb_convert_encoding($subject, "ISO-8859-1");
// $subject = mb_convert_encoding($subject, "UTF-8");
//$subject = mb_decode_mimeheader($subject);
$date = $headers->date;
$message['subject'] = $subject;
$message['date'] = $date;
return $message;
}
/** /**
* get Recipients from the headers of an email * get Recipients from the headers of an email
* *
@ -562,8 +500,13 @@ class VaultShell extends Shell {
$recipientList = array(); $recipientList = array();
if(count($recipientObj) > 0) { if(count($recipientObj) > 0) {
foreach($recipientObj as $id=> $object) { foreach($recipientObj as $id=> $object) {
if(!isset($object->host)) { //No hostname? Ignore it.
continue;
}
else {
$recipientList[] = strtolower($object->mailbox."@".$object->host); $recipientList[] = strtolower($object->mailbox."@".$object->host);
} }
}
return $recipientList; return $recipientList;
} }
} }
@ -576,16 +519,25 @@ class VaultShell extends Shell {
* *
*/ */
function fetchBodyAttachments($mailbox, $msg_number, $email_dir, $relative_path, $ripmime_path) { function fetchBodyAttachments($email_filename, $vault_dir, $email_dir, $relative_path, $ripmime_path) {
$uuid = String::uuid(); $uuid = String::uuid();
$email_file = $email_dir.'/'.$uuid.'.eml'; $email_file = $vault_dir."/".$email_filename;
imap_savebody($mailbox, $email_file, $msg_number);
$command = "$ripmime_path -i $email_file -d $email_dir/$relative_path --prefix --paranoid -v --verbose-contenttype --recursion-max 30"; $command = "$ripmime_path -i $email_file -d $email_dir/$relative_path --prefix --paranoid -v --verbose-contenttype --recursion-max 30";
$output = array(); $output = array();
exec($command, $output, $status); exec($command, $output, $status);
echo $email_file ."\n";
echo $command ."\n";
/* Check the $output array and find the filenames of the attachments */ /* Check the $output array and find the filenames of the attachments */
if($status == 0) { if($status == 0) {
@ -632,7 +584,7 @@ class VaultShell extends Shell {
$attachments[$j]['filename'] = $name[1]; $attachments[$j]['filename'] = $name[1];
$attachments[$j]['size'] = $size; $attachments[$j]['size'] = $size;
$attachments[$j]['is_message_body'] = 0; $attachments[$j]['is_message_body'] = 0;
echo "in message number $msg_number: found attachment ".$attachments[$j]['name'].' '.$attachments[$j]['type']."\n"; echo "found attachment ".$attachments[$j]['name'].' '.$attachments[$j]['type']."\n";
/** Record the array ID and size of the largest HTML & plain attachments **/ /** Record the array ID and size of the largest HTML & plain attachments **/
@ -655,12 +607,9 @@ class VaultShell extends Shell {
else if($biggestPlain_num != 999) { else if($biggestPlain_num != 999) {
$attachments[$biggestPlain_num]['is_message_body'] = 1; $attachments[$biggestPlain_num]['is_message_body'] = 1;
} }
unlink($email_file);
return $attachments; return $attachments;
} }
else { else {
unlink($email_file);
return 1; return 1;
} }
} }