589 lines
31 KiB
Python
589 lines
31 KiB
Python
import os
|
|
import re
|
|
import subprocess
|
|
import uuid
|
|
import shutil
|
|
from datetime import datetime
|
|
from email.parser import HeaderParser
|
|
from email.utils import parsedate_to_datetime, getaddresses
|
|
from email.header import decode_header
|
|
|
|
from django.core.management.base import BaseCommand
|
|
from django.conf import settings
|
|
from django.db import transaction
|
|
from django.utils import timezone
|
|
import cmc
|
|
|
|
# Assuming models are in 'your_app'. Adjust the import path as needed.
|
|
from cmc.models import (
|
|
Enquiry, Contact, Invoice, PurchaseOrder, User, Email, EmailRecipient,
|
|
Job, EmailAttachment
|
|
)
|
|
|
|
# --- Configuration ---
|
|
# Consider moving these to Django settings (settings.py) for better practice
|
|
RIPMIME_PATH = getattr(settings, 'VAULT_RIPMIME_PATH', '/usr/bin/ripmime') # Default to /usr/bin/ripmime
|
|
EMAIL_DIR = getattr(settings, 'VAULT_EMAIL_DIR', '/var/www/emails') # Attachment storage base
|
|
VAULT_DIR = getattr(settings, 'VAULT_NEW_DIR', '/var/www/vaultmsgs/new') # Incoming emails
|
|
PROCESSED_DIR = getattr(settings, 'VAULT_PROCESSED_DIR', '/var/www/vaultmsgs/cur') # Processed emails
|
|
|
|
# --- Regex Patterns for Identifiers (adjust if needed) ---
|
|
# Use Python's raw strings (r"...") for regex
|
|
ENQUIRY_REGEX = re.compile(r"CMC\d+([NVQWSOT]|ACT|NT)E\d+-\d+")
|
|
INVOICE_REGEX = re.compile(r"CMCIN\d+")
|
|
PO_REGEX = re.compile(r"CMCPO\d+")
|
|
JOB_REGEX = re.compile(r"(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\d+(N|V|W|S|T|NT|ACT|Q|O)J\d+")
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = 'Processes email files from the vault directory, extracts info, saves to DB, and archives.'
|
|
|
|
def _make_map(self, model_class, key_field, value_field='id'):
|
|
"""Helper to create a dictionary map from model data."""
|
|
# Use .values() for efficiency
|
|
qs = model_class.objects.all().values(value_field, key_field)
|
|
# Handle potential lowercase requirement (like for user emails)
|
|
is_user_email = model_class == User and key_field == 'email'
|
|
return {
|
|
(item[key_field].lower() if is_user_email and item[key_field] else item[key_field]): item[value_field]
|
|
for item in qs if item[key_field] # Ensure key is not None or empty
|
|
}
|
|
|
|
def _decode_mime_header(self, header_value):
|
|
"""Decodes MIME encoded headers (like Subject) into a readable string."""
|
|
if not header_value:
|
|
return ""
|
|
try:
|
|
parts = decode_header(str(header_value))
|
|
decoded_parts = []
|
|
for part, encoding in parts:
|
|
if isinstance(part, bytes):
|
|
# Use errors='ignore' or 'replace' for robustness
|
|
decoded_parts.append(part.decode(encoding or 'utf-8', errors='replace'))
|
|
else:
|
|
decoded_parts.append(part)
|
|
return "".join(decoded_parts)
|
|
except Exception as e:
|
|
self.stderr.write(f"Warning: Could not decode header '{header_value}': {e}")
|
|
# Return original or placeholder if decoding fails
|
|
return str(header_value) if isinstance(header_value, str) else repr(header_value)
|
|
|
|
|
|
def _get_recipients(self, headers):
|
|
"""Extracts To, From, Cc recipients from parsed headers."""
|
|
recipients = {'to': [], 'from': [], 'cc': []}
|
|
address_fields = {'to': 'To', 'from': 'From', 'cc': 'Cc'}
|
|
|
|
for key, header_name in address_fields.items():
|
|
header_val = headers.get(header_name)
|
|
if header_val:
|
|
# getaddresses handles parsing "Name <email@example.com>" formats
|
|
for _, email_addr in getaddresses([str(header_val)]):
|
|
if email_addr and '@' in email_addr: # Basic validation
|
|
recipients[key].append(email_addr.lower()) # Normalize to lowercase
|
|
return recipients
|
|
|
|
def _check_valid_identifier(self, subject, identifier_map, regex):
|
|
"""Finds identifiers in the subject using regex and returns corresponding DB IDs."""
|
|
found_ids = set() # Use a set to avoid duplicates
|
|
if not subject:
|
|
return list(found_ids)
|
|
# Find all matches in the subject
|
|
matches = regex.findall(subject)
|
|
for match in matches:
|
|
# If regex uses capturing groups, match might be a tuple
|
|
identifier = match[0] if isinstance(match, tuple) else match
|
|
if identifier in identifier_map:
|
|
found_ids.add(identifier_map[identifier])
|
|
return list(found_ids)
|
|
|
|
def _get_attachment_directory(self, base_dir, dt_object):
|
|
"""Ensures the YYYY/MM directory exists and returns the relative path."""
|
|
if not dt_object: # Handle cases where date parsing failed
|
|
dt_object = timezone.now() # Fallback to current time
|
|
# Use YYYY/MM structure which is common and sorts better
|
|
relative_path = dt_object.strftime('%Y/%m')
|
|
full_path = os.path.join(base_dir, relative_path)
|
|
try:
|
|
# exist_ok=True prevents error if directory already exists
|
|
os.makedirs(full_path, exist_ok=True)
|
|
return relative_path
|
|
except OSError as e:
|
|
self.stderr.write(f"ERROR: Could not create directory {full_path}: {e}")
|
|
# Fallback to base directory if creation fails? Or raise error?
|
|
# Raising error might be safer to avoid scattering files.
|
|
raise # Re-raise the exception
|
|
|
|
|
|
def _fetch_body_attachments(self, email_filename, vault_path, attachment_base_dir, relative_path):
|
|
"""Uses ripmime to extract attachments and identifies the most likely body."""
|
|
attachments_data = []
|
|
email_file_path = os.path.join(vault_path, email_filename)
|
|
output_dir = os.path.join(attachment_base_dir, relative_path)
|
|
|
|
# Generate a unique prefix for this email's attachments to prevent collisions
|
|
# Note: ripmime's --prefix is different; we prepend manually after extraction if needed.
|
|
# The original script seemed to rename *after* ripmime using UUID. Let's replicate that.
|
|
# ripmime extracts to output_dir. We parse its output, then rename the files.
|
|
|
|
command = [
|
|
RIPMIME_PATH,
|
|
'-i', email_file_path,
|
|
'-d', output_dir,
|
|
'--stdout', # Get output list on stdout
|
|
'--no-nameless', # Ignore parts without filenames (less useful often)
|
|
'--paranoid', # Use safe filenames
|
|
'-v', # Verbose needed for content type
|
|
'--verbose-contenttype',
|
|
'--recursion-max', '5' # Limit recursion depth (original had 30)
|
|
]
|
|
|
|
try:
|
|
self.stdout.write(f"Running ripmime: {' '.join(command)}")
|
|
# Use check=True to raise CalledProcessError on failure
|
|
# Capture output, decode from bytes
|
|
result = subprocess.run(command, capture_output=True, text=True, check=True, encoding='utf-8', errors='replace')
|
|
output_lines = result.stdout.splitlines()
|
|
self.stdout.write(f"ripmime output:\n{result.stdout}") # Log output for debugging
|
|
|
|
except FileNotFoundError:
|
|
self.stderr.write(f"ERROR: ripmime command not found at {RIPMIME_PATH}. Please check path.")
|
|
return [] # Cannot proceed without ripmime
|
|
except subprocess.CalledProcessError as e:
|
|
self.stderr.write(f"ERROR: ripmime failed for {email_filename} with status {e.returncode}.")
|
|
self.stderr.write(f"ripmime stderr:\n{e.stderr}")
|
|
self.stderr.write(f"ripmime stdout:\n{e.stdout}")
|
|
return [] # Failed to extract
|
|
except Exception as e:
|
|
self.stderr.write(f"ERROR: Unexpected error running ripmime for {email_filename}: {e}")
|
|
return []
|
|
|
|
# --- Process ripmime output ---
|
|
# Example ripmime verbose output line (might vary slightly):
|
|
# Extracted 'text_plain_plain_1.txt' (text/plain) Size: 1234
|
|
# Or with --verbose-contenttype:
|
|
# file=text_plain_plain_1.txt content-type=text/plain size=12345
|
|
# Let's parse the second format if available, otherwise adapt
|
|
|
|
raw_attachments = {} # Store temp data before renaming and size check
|
|
for line in output_lines:
|
|
line = line.strip()
|
|
# Try parsing key=value format first
|
|
parts = {p.split('=', 1)[0]: p.split('=', 1)[1] for p in line.split() if '=' in p}
|
|
if 'file' in parts and 'content-type' in parts:
|
|
original_filename = parts['file'].strip("'\"") # Remove potential quotes
|
|
mime_type = parts['content-type'].strip("'\"")
|
|
# Size might be present or need to be obtained via os.path.getsize
|
|
raw_attachments[original_filename] = {'type': mime_type, 'size': None} # Size TBD
|
|
else:
|
|
# Fallback parsing (less reliable, adjust based on actual ripmime output)
|
|
match = re.match(r"Extracted\s+'?([^']+)'?\s+\(([^)]+)\)", line)
|
|
if match:
|
|
original_filename = match.group(1)
|
|
mime_type = match.group(2)
|
|
raw_attachments[original_filename] = {'type': mime_type, 'size': None} # Size TBD
|
|
|
|
|
|
# --- Rename files with UUID, get size, and determine body ---
|
|
processed_attachments = []
|
|
file_uuid = uuid.uuid4()
|
|
|
|
biggest_html_idx = -1
|
|
biggest_html_size = -1
|
|
biggest_plain_idx = -1
|
|
biggest_plain_size = -1
|
|
|
|
idx = 0
|
|
for original_filename, data in raw_attachments.items():
|
|
old_path = os.path.join(output_dir, original_filename)
|
|
# Sanitize filename slightly (replace problematic chars) before adding UUID
|
|
safe_part = re.sub(r'[^\w\.\-]', '_', original_filename)
|
|
new_filename = f"{file_uuid}-{safe_part}"
|
|
new_path = os.path.join(output_dir, new_filename)
|
|
relative_new_path = os.path.join(relative_path, new_filename) # Path to store in DB
|
|
|
|
try:
|
|
if os.path.exists(old_path):
|
|
shutil.move(old_path, new_path) # Use shutil.move for cross-filesystem safety
|
|
size = os.path.getsize(new_path)
|
|
data['size'] = size
|
|
data['new_name'] = relative_new_path # Store the path relative to EMAIL_DIR
|
|
data['original_filename'] = original_filename # Keep original name for DB
|
|
data['is_message_body'] = False # Default
|
|
processed_attachments.append(data)
|
|
|
|
# Track largest text/html and text/plain
|
|
if data['type'].lower() == 'text/html':
|
|
if size > biggest_html_size:
|
|
biggest_html_size = size
|
|
biggest_html_idx = idx
|
|
elif data['type'].lower() == 'text/plain':
|
|
if size > biggest_plain_size:
|
|
biggest_plain_size = size
|
|
biggest_plain_idx = idx
|
|
|
|
idx += 1
|
|
else:
|
|
self.stderr.write(f"Warning: Ripped file '{original_filename}' not found at '{old_path}'. Skipping.")
|
|
|
|
except OSError as e:
|
|
self.stderr.write(f"ERROR: Could not rename/get size for '{original_filename}': {e}")
|
|
except Exception as e: # Catch any other unexpected errors
|
|
self.stderr.write(f"ERROR: Unexpected error processing attachment '{original_filename}': {e}")
|
|
|
|
|
|
# Mark the likely message body
|
|
if biggest_html_idx != -1:
|
|
processed_attachments[biggest_html_idx]['is_message_body'] = True
|
|
elif biggest_plain_idx != -1: # Fallback to plain text if no HTML
|
|
processed_attachments[biggest_plain_idx]['is_message_body'] = True
|
|
else:
|
|
# If neither found, maybe log a warning? Depends on requirements.
|
|
self.stdout.write(f"Warning: No clear text/html or text/plain body found for {email_filename}")
|
|
|
|
|
|
# Prepare final list for DB insertion
|
|
final_attachments = [
|
|
{
|
|
'name': att['new_name'], # Path relative to EMAIL_DIR
|
|
'filename': att['original_filename'],
|
|
'type': att['type'],
|
|
'size': att['size'],
|
|
'is_message_body': att['is_message_body']
|
|
}
|
|
for att in processed_attachments if att.get('size') is not None # Ensure size was obtained
|
|
]
|
|
|
|
return final_attachments
|
|
|
|
|
|
def _move_processed_file(self, email_filename):
|
|
"""Moves the processed email file from vault to processed directory."""
|
|
source_path = os.path.join(VAULT_DIR, email_filename)
|
|
# Mimic the original ":S" suffix if needed, otherwise just move
|
|
# The purpose of ":S" isn't clear, maybe 'S' for 'Seen' or 'Success'? Let's omit it for simplicity unless required.
|
|
# target_filename = f"{email_filename}:S"
|
|
target_filename = email_filename
|
|
target_path = os.path.join(PROCESSED_DIR, target_filename)
|
|
|
|
try:
|
|
# Ensure target directory exists
|
|
os.makedirs(PROCESSED_DIR, exist_ok=True)
|
|
shutil.move(source_path, target_path)
|
|
self.stdout.write(f"Moved '{source_path}' to '{target_path}'")
|
|
return True
|
|
except OSError as e:
|
|
self.stderr.write(f"ERROR: Unable to move '{source_path}' to '{target_path}': {e}")
|
|
# Decide what to do here: leave the file? Try again later?
|
|
# For now, just report error and continue.
|
|
return False
|
|
except Exception as e:
|
|
self.stderr.write(f"ERROR: Unexpected error moving '{source_path}': {e}")
|
|
return False
|
|
|
|
|
|
# --- Main Handler ---
|
|
def handle(self, *args, **options):
|
|
self.stdout.write("Starting Vault email processing...")
|
|
|
|
# --- Pre-load data into maps for efficiency ---
|
|
try:
|
|
self.stdout.write("Loading data maps...")
|
|
# Use specific fields needed for maps
|
|
enquiry_map = self._make_map(Enquiry, 'title')
|
|
invoice_map = self._make_map(Invoice, 'title')
|
|
po_map = self._make_map(PurchaseOrder, 'title')
|
|
job_map = self._make_map(Job, 'title')
|
|
# Load User emails into a map {email.lower(): user_id}
|
|
user_map = self._make_map(User, 'email')
|
|
self.stdout.write(f"Loaded {len(enquiry_map)} enquiries, {len(invoice_map)} invoices, "
|
|
f"{len(po_map)} POs, {len(job_map)} jobs, {len(user_map)} users.")
|
|
except Exception as e:
|
|
self.stderr.write(f"FATAL: Could not load initial data maps: {e}")
|
|
return # Cannot proceed without maps
|
|
|
|
# --- Ensure directories exist ---
|
|
for dir_path in [EMAIL_DIR, VAULT_DIR, PROCESSED_DIR]:
|
|
if not os.path.isdir(dir_path):
|
|
try:
|
|
self.stdout.write(f"Creating required directory: {dir_path}")
|
|
os.makedirs(dir_path, exist_ok=True)
|
|
except OSError as e:
|
|
self.stderr.write(f"FATAL: Could not create required directory {dir_path}: {e}")
|
|
return # Cannot proceed
|
|
|
|
# --- Process new emails ---
|
|
try:
|
|
new_email_files = [f for f in os.listdir(VAULT_DIR)
|
|
if os.path.isfile(os.path.join(VAULT_DIR, f)) and not f.startswith('.')]
|
|
except OSError as e:
|
|
self.stderr.write(f"FATAL: Could not list files in vault directory {VAULT_DIR}: {e}")
|
|
return # Cannot proceed
|
|
|
|
self.stdout.write(f"Found {len(new_email_files)} new email files to process.")
|
|
|
|
processed_count = 0
|
|
skipped_count = 0
|
|
error_count = 0
|
|
|
|
for email_filename in new_email_files:
|
|
self.stdout.write(f"\n--- Handling '{email_filename}' ---")
|
|
file_path = os.path.join(VAULT_DIR, email_filename)
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
# Read content - original used str_replace("\r", ""), reading in text mode handles line endings
|
|
content = f.read()
|
|
except FileNotFoundError:
|
|
self.stderr.write(f"Warning: File '{email_filename}' disappeared before processing. Skipping.")
|
|
skipped_count += 1
|
|
continue # Already gone, or was never a file
|
|
except OSError as e:
|
|
self.stderr.write(f"ERROR: Could not read file '{email_filename}': {e}. Skipping.")
|
|
error_count += 1
|
|
# Decide whether to move the unreadable file
|
|
# self._move_processed_file(email_filename) # Maybe move to an error dir?
|
|
continue
|
|
except Exception as e: # Catch other potential read errors
|
|
self.stderr.write(f"ERROR: Unexpected error reading file '{email_filename}': {e}. Skipping.")
|
|
error_count += 1
|
|
continue
|
|
|
|
|
|
if not content:
|
|
self.stdout.write("No content found. Moving and skipping.")
|
|
self._move_processed_file(email_filename)
|
|
skipped_count += 1
|
|
continue
|
|
|
|
# --- Parse Headers ---
|
|
try:
|
|
parser = HeaderParser()
|
|
headers = parser.parsestr(content)
|
|
except Exception as e:
|
|
self.stderr.write(f"ERROR: Could not parse headers for '{email_filename}': {e}. Skipping.")
|
|
error_count += 1
|
|
self._move_processed_file(email_filename) # Move problematic file
|
|
continue
|
|
|
|
|
|
# --- Extract Core Info ---
|
|
subject_decoded = self._decode_mime_header(headers.get('Subject'))
|
|
date_str = headers.get('Date')
|
|
email_datetime = None
|
|
if date_str:
|
|
try:
|
|
# parsedate_to_datetime handles timezone info better
|
|
dt = parsedate_to_datetime(date_str)
|
|
# Convert to timezone-aware datetime if naive, using settings.TIME_ZONE
|
|
if timezone.is_naive(dt):
|
|
email_datetime = timezone.make_aware(dt, timezone.get_default_timezone())
|
|
else:
|
|
# Convert to default timezone for consistency if needed, or store as is
|
|
email_datetime = dt.astimezone(timezone.get_default_timezone())
|
|
|
|
except (TypeError, ValueError, Exception) as e: # Catch various parsing errors
|
|
self.stderr.write(f"Warning: Could not parse date '{date_str}' for {email_filename}: {e}. Using current time.")
|
|
email_datetime = timezone.now() # Fallback
|
|
else:
|
|
self.stdout.write(f"Warning: No date header found for {email_filename}. Using current time.")
|
|
email_datetime = timezone.now() # Fallback
|
|
|
|
recipients = self._get_recipients(headers)
|
|
all_recipient_emails = set(recipients['to'] + recipients['cc'] + recipients['from'])
|
|
|
|
# --- Determine if email should be saved ---
|
|
save_this = False
|
|
from_known_user = False
|
|
known_user_involved = False # Tracks if any To/From/Cc is known
|
|
|
|
# Check if any recipient (To, From, Cc) is a known user
|
|
for email_addr in all_recipient_emails:
|
|
if email_addr in user_map:
|
|
known_user_involved = True
|
|
if email_addr in recipients['from']:
|
|
from_known_user = True
|
|
# No need to break, we need to check all for recipient linking later
|
|
# break # Optimization: if one known user is found, we might save
|
|
|
|
if not subject_decoded:
|
|
self.stdout.write("No subject found. Moving and skipping.")
|
|
self._move_processed_file(email_filename)
|
|
skipped_count += 1
|
|
continue
|
|
|
|
# Check for identifiers in the subject
|
|
found_enquiry_ids = self._check_valid_identifier(subject_decoded, enquiry_map, ENQUIRY_REGEX)
|
|
found_invoice_ids = self._check_valid_identifier(subject_decoded, invoice_map, INVOICE_REGEX)
|
|
found_po_ids = self._check_valid_identifier(subject_decoded, po_map, PO_REGEX)
|
|
found_job_ids = self._check_valid_identifier(subject_decoded, job_map, JOB_REGEX)
|
|
|
|
found_any_identifier = bool(found_enquiry_ids or found_invoice_ids or found_po_ids or found_job_ids)
|
|
|
|
# Decision logic (matches original script): Save if From/To/Cc is known OR if identifier found
|
|
# The original script's logic was a bit tangled with `saveThis`. Let's simplify:
|
|
# We save if *any* known user is involved OR if *any* identifier is found.
|
|
should_save_email = known_user_involved or found_any_identifier
|
|
|
|
if not should_save_email:
|
|
self.stdout.write(f"Email does not involve known users and has no known identifiers in subject '{subject_decoded}'. Moving and skipping.")
|
|
self._move_processed_file(email_filename)
|
|
skipped_count += 1
|
|
continue
|
|
|
|
# --- Prepare to Save Email (Inside a Transaction) ---
|
|
self.stdout.write("Saving email and related data...")
|
|
try:
|
|
with transaction.atomic(): # Ensure all DB operations succeed or fail together
|
|
# --- Get/Create User IDs for all recipients ---
|
|
recipient_user_ids = {'to': [], 'from': [], 'cc': []}
|
|
sender_user_id = None
|
|
|
|
for recipient_type, email_list in recipients.items():
|
|
for email_addr in email_list:
|
|
user_id = user_map.get(email_addr)
|
|
if not user_id:
|
|
# User not found, create a new one
|
|
self.stdout.write(f"Creating new user for email: {email_addr}")
|
|
# Use get_or_create to handle potential race conditions if run concurrently (unlikely here)
|
|
# Or just create if duplicates are handled by unique constraint
|
|
try:
|
|
new_user, created = User.objects.get_or_create(
|
|
email=email_addr,
|
|
defaults={'type': 'contact', 'by_vault': True}
|
|
)
|
|
user_id = new_user.id
|
|
if created:
|
|
self.stdout.write(f"New user '{email_addr}' created with ID: {user_id}")
|
|
user_map[email_addr] = user_id # Update map for subsequent lookups in this run
|
|
else:
|
|
self.stdout.write(f"User '{email_addr}' already existed (ID: {user_id}), linking.")
|
|
# Ensure it's in the map if somehow missed initial load
|
|
user_map[email_addr] = user_id
|
|
|
|
|
|
except Exception as db_err:
|
|
# Log detailed error, but try to continue if possible without this user
|
|
self.stderr.write(f"ERROR: Failed to create or get user for '{email_addr}': {db_err}. This recipient may be skipped.")
|
|
continue # Skip adding this recipient if user creation failed critically
|
|
|
|
# Store the user ID for linking later
|
|
if user_id: # Only store if we successfully got/created the user
|
|
recipient_user_ids[recipient_type].append(user_id)
|
|
if recipient_type == 'from':
|
|
# Assuming only one 'from' address is primary sender
|
|
if sender_user_id is None:
|
|
sender_user_id = user_id
|
|
else:
|
|
self.stdout.write(f"Warning: Multiple 'From' addresses found for {email_filename}. Using first: ID {sender_user_id}")
|
|
|
|
|
|
if sender_user_id is None:
|
|
# This case should be rare if we save based on known users/identifiers,
|
|
# but handle it defensively. Maybe create a placeholder 'unknown' user?
|
|
self.stderr.write(f"ERROR: Could not determine sender User ID for {email_filename}. Skipping save.")
|
|
# Don't move the file yet, as the transaction will roll back.
|
|
# We need to explicitly move it outside the transaction block if skipping here.
|
|
raise ValueError("Sender User ID could not be determined.") # Raise to trigger rollback
|
|
|
|
|
|
# --- Create Email Record ---
|
|
new_email_obj = Email.objects.create(
|
|
user_id=sender_user_id, # Link to the sender User object
|
|
subject=subject_decoded[:500], # Truncate if subject is longer than field allows
|
|
udate=email_datetime,
|
|
filename=email_filename # Store original vault filename
|
|
)
|
|
self.stdout.write(f"Created Email object with ID: {new_email_obj.id}")
|
|
|
|
# --- Create EmailRecipient Records ---
|
|
recipients_to_create = []
|
|
unique_recipients = set() # Track (user_id, type) pairs to avoid duplicates
|
|
|
|
for recipient_type, user_id_list in recipient_user_ids.items():
|
|
# Map recipient_type ('to', 'cc', 'from') to model choices if needed
|
|
model_recipient_type = recipient_type # Assuming model choices match keys
|
|
for user_id in user_id_list:
|
|
if (user_id, model_recipient_type) not in unique_recipients:
|
|
recipients_to_create.append(
|
|
EmailRecipient(email=new_email_obj, user_id=user_id, type=model_recipient_type)
|
|
)
|
|
unique_recipients.add((user_id, model_recipient_type))
|
|
|
|
if recipients_to_create:
|
|
EmailRecipient.objects.bulk_create(recipients_to_create)
|
|
self.stdout.write(f"Created {len(recipients_to_create)} EmailRecipient links.")
|
|
|
|
|
|
# --- Process and Save Attachments ---
|
|
attachment_dir_relative = self._get_attachment_directory(EMAIL_DIR, email_datetime)
|
|
attachments = self._fetch_body_attachments(
|
|
email_filename,
|
|
VAULT_DIR,
|
|
EMAIL_DIR,
|
|
attachment_dir_relative
|
|
)
|
|
|
|
attachments_to_create = []
|
|
if attachments: # Check if list is not empty
|
|
for att_data in attachments:
|
|
attachments_to_create.append(
|
|
EmailAttachment(
|
|
email=new_email_obj,
|
|
name=att_data['name'][:500], # Relative path, check length
|
|
filename=att_data['filename'][:255], # Original name, check length
|
|
type=att_data['type'][:100], # Mime type, check length
|
|
size=att_data['size'],
|
|
is_message_body=att_data['is_message_body']
|
|
)
|
|
)
|
|
if attachments_to_create:
|
|
EmailAttachment.objects.bulk_create(attachments_to_create)
|
|
self.stdout.write(f"Created {len(attachments_to_create)} EmailAttachment records.")
|
|
else:
|
|
self.stdout.write("No attachments found or processed by ripmime.")
|
|
|
|
|
|
# --- Link to Found Identifiers (ManyToMany) ---
|
|
if found_enquiry_ids:
|
|
new_email_obj.enquiries.add(*found_enquiry_ids)
|
|
self.stdout.write(f"Linked email to Enquiries: {found_enquiry_ids}")
|
|
if found_invoice_ids:
|
|
new_email_obj.invoices.add(*found_invoice_ids)
|
|
self.stdout.write(f"Linked email to Invoices: {found_invoice_ids}")
|
|
if found_po_ids:
|
|
new_email_obj.purchase_orders.add(*found_po_ids)
|
|
self.stdout.write(f"Linked email to Purchase Orders: {found_po_ids}")
|
|
if found_job_ids:
|
|
new_email_obj.jobs.add(*found_job_ids)
|
|
self.stdout.write(f"Linked email to Jobs: {found_job_ids}")
|
|
|
|
# If transaction completes, move the file
|
|
if self._move_processed_file(email_filename):
|
|
processed_count += 1
|
|
else:
|
|
# DB changes are saved, but file move failed. Log this clearly.
|
|
self.stderr.write(f"CRITICAL WARNING: DB record created for {email_filename} (Email ID: {new_email_obj.id}), but FAILED TO MOVE file.")
|
|
error_count += 1 # Count as error due to incomplete processing
|
|
|
|
except ValueError as ve: # Catch specific error for sender determination
|
|
self.stderr.write(f"Skipping save for {email_filename} due to error: {ve}")
|
|
# Move the file even if saving failed due to missing sender
|
|
self._move_processed_file(email_filename)
|
|
error_count += 1
|
|
except Exception as e:
|
|
# Catch any other error during DB operations or attachment processing
|
|
self.stderr.write(f"ERROR: Failed to save email data for '{email_filename}' during transaction: {e}")
|
|
# Transaction automatically rolls back on exception
|
|
# Do NOT move the file, as processing failed. It will be retried next run.
|
|
error_count += 1
|
|
# Consider logging the full traceback for debugging
|
|
import traceback
|
|
self.stderr.write(traceback.format_exc())
|
|
|
|
|
|
self.stdout.write("\n--- Processing Summary ---")
|
|
self.stdout.write(f"Successfully processed and saved: {processed_count}")
|
|
self.stdout.write(f"Skipped (no subject/content/not relevant): {skipped_count}")
|
|
self.stdout.write(f"Errors during processing (file may remain in '{VAULT_DIR}'): {error_count}")
|
|
self.stdout.write("Vault email processing finished.") |